From 8e923b0ad9b9b1f89413008a5acd8408c67841f6 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 13 Apr 2021 16:40:21 -0700 Subject: [PATCH] Initial public commit of regalloc2. --- .empty | 0 .gitignore | 4 + Cargo.toml | 27 + LICENSE | 220 ++ README.md | 159 ++ benches/regalloc.rs | 56 + fuzz/.gitignore | 4 + fuzz/Cargo.toml | 54 + fuzz/fuzz_targets/domtree.rs | 128 + fuzz/fuzz_targets/ion.rs | 11 + fuzz/fuzz_targets/ion_checker.rs | 39 + fuzz/fuzz_targets/moves.rs | 76 + fuzz/fuzz_targets/ssagen.rs | 35 + src/bin/test.rs | 45 + src/bitvec.rs | 139 ++ src/cfg.rs | 110 + src/checker.rs | 615 +++++ src/domtree.rs | 118 + src/fuzzing/func.rs | 542 +++++ src/fuzzing/mod.rs | 3 + src/index.rs | 176 ++ src/ion/LICENSE | 373 +++ src/ion/mod.rs | 3763 ++++++++++++++++++++++++++++++ src/lib.rs | 780 +++++++ src/moves.rs | 199 ++ src/postorder.rs | 51 + src/ssa.rs | 87 + 27 files changed, 7814 insertions(+) delete mode 100644 .empty create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 LICENSE create mode 100644 README.md create mode 100644 benches/regalloc.rs create mode 100644 fuzz/.gitignore create mode 100644 fuzz/Cargo.toml create mode 100644 fuzz/fuzz_targets/domtree.rs create mode 100644 fuzz/fuzz_targets/ion.rs create mode 100644 fuzz/fuzz_targets/ion_checker.rs create mode 100644 fuzz/fuzz_targets/moves.rs create mode 100644 fuzz/fuzz_targets/ssagen.rs create mode 100644 src/bin/test.rs create mode 100644 src/bitvec.rs create mode 100644 src/cfg.rs create mode 100644 src/checker.rs create mode 100644 src/domtree.rs create mode 100644 src/fuzzing/func.rs create mode 100644 src/fuzzing/mod.rs create mode 100644 src/index.rs create mode 100644 src/ion/LICENSE create mode 100644 src/ion/mod.rs create mode 100644 src/lib.rs create mode 100644 src/moves.rs create mode 100644 src/postorder.rs create mode 100644 src/ssa.rs diff --git a/.empty b/.empty deleted file mode 100644 index e69de29b..00000000 diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..aadc1161 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +Cargo.lock +target/ +.*.swp +*~ diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 00000000..7e32c7c1 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "regalloc2" +version = "0.0.1" +authors = ["Chris Fallin ", "Mozilla SpiderMonkey Developers"] +edition = "2018" +license = "Apache-2.0 WITH LLVM-exception AND MPL-2.0" +description = "Backtracking register allocator ported from IonMonkey" +repository = "https://github.com/cfallin/regalloc2" + +[dependencies] +log = { version = "0.4.8", default-features = false } +smallvec = "1.6.1" +# keep this in sync with libfuzzer_sys's crate version: +arbitrary = "^0.4.6" +rand = "0.8" +rand_chacha = "0.3" +env_logger = "*" + +[dev-dependencies] +criterion = "0.3" + +[profile.release] +debug = true + +[[bench]] +name = "regalloc" +harness = false diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..f9d81955 --- /dev/null +++ b/LICENSE @@ -0,0 +1,220 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +--- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + diff --git a/README.md b/README.md new file mode 100644 index 00000000..e755c4c0 --- /dev/null +++ b/README.md @@ -0,0 +1,159 @@ +## regalloc2: another register allocator + +This is a register allocator that started life as, and is about 75% +still, a port of IonMonkey's backtracking register allocator to +Rust. The data structures and invariants have been simplified a little +bit, and the interfaces made a little more generic and reusable. In +addition, it contains substantial amounts of testing infrastructure +(fuzzing harnesses and checkers) that does not exist in the original +IonMonkey allocator. + +### Design Overview + +TODO + +- SSA with blockparams + +- Operands with constraints, and clobbers, and reused regs; contrast + with regalloc.rs approach of vregs and pregs and many moves that get + coalesced/elided + +### Differences from IonMonkey Backtracking Allocator + +There are a number of differences between the [IonMonkey +allocator](https://searchfox.org/mozilla-central/source/js/src/jit/BacktrackingAllocator.cpp) +and this one: + +* Most significantly, there are [fuzz/fuzz_targets/](many different + fuzz targets) that exercise the allocator, including a full symbolic + checker (`ion_checker` target) based on the [symbolic checker in + regalloc.rs](https://cfallin.org/blog/2021/03/15/cranelift-isel-3/) + and, e.g., a targetted fuzzer for the parallel move-resolution + algorithm (`moves`) and the SSA generator used for generating cases + for the other fuzz targets (`ssagen`). + +* The data-structure invariants are simplified. While the IonMonkey + allocator allowed for LiveRanges and Bundles to overlap in certain + cases, this allocator sticks to a strict invariant: ranges do not + overlap in bundles, and bundles do not overlap. There are other + examples too: e.g., the definition of minimal bundles is very simple + and does not depend on scanning the code at all. In general, we + should be able to state simple invariants and see by inspection (as + well as fuzzing -- see above) that they hold. + +* Many of the algorithms in the IonMonkey allocator are built with + helper functions that do linear scans. These "small quadratic" loops + are likely not a huge issue in practice, but nevertheless have the + potential to be in corner cases. As much as possible, all work in + this allocator is done in linear scans. For example, bundle + splitting is done in a single compound scan over a bundle, ranges in + the bundle, and a sorted list of split-points. + +* There are novel schemes for solving certain interesting design + challenges. One example: in IonMonkey, liveranges are connected + across blocks by, when reaching one end of a control-flow edge in a + scan, doing a lookup of the allocation at the other end. This is in + principle a linear lookup (so quadratic overall). We instead + generate a list of "half-moves", keyed on the edge and from/to + vregs, with each holding one of the allocations. By sorting and then + scanning this list, we can generate all edge moves in one linear + scan. There are a number of other examples of simplifications: for + example, we handle multiple conflicting + physical-register-constrained uses of a vreg in a single instruction + by recording a copy to do in a side-table, then removing constraints + for the core regalloc. Ion instead has to tweak its definition of + minimal bundles and create two liveranges that overlap (!) to + represent the two uses. + +* Using block parameters rather than phi-nodes significantly + simplifies handling of inter-block data movement. IonMonkey had to + special-case phis in many ways because they are actually quite + weird: their uses happen semantically in other blocks, and their + defs happen in parallel at the top of the block. Block parameters + naturally and explicitly reprsent these semantics in a direct way. + +* The allocator supports irreducible control flow and arbitrary block + ordering (its only CFG requirement is that critical edges are + split). It handles loops during live-range computation in a way that + is similar in spirit to IonMonkey's allocator -- in a single pass, + when we discover a loop, we just mark the whole loop as a liverange + for values live at the top of the loop -- but we find the loop body + without the fixpoint workqueue loop that IonMonkey uses, instead + doing a single linear scan for backedges and finding the minimal + extent that covers all intermingled loops. In order to support + arbitrary block order and irreducible control flow, we relax the + invariant that the first liverange for a vreg always starts at its + def; instead, the def can happen anywhere, and a liverange may + overapproximate. It turns out this is not too hard to handle and is + a more robust invariant. (It also means that non-SSA code *may* not + be too hard to adapt to, though I haven't seriously thought about + this.) + +### Rough Performance Comparison with Regalloc.rs + +The allocator has not yet been wired up to a suitable compiler backend +(such as Cranelift) to perform a true apples-to-apples compile-time +and runtime comparison. However, we can get some idea of compile speed +by running suitable test cases through the allocator and measuring +*throughput*: that is, instructions per second for which registers are +allocated. + +To do so, I measured the `qsort2` benchmark in +[regalloc.rs](https://github.com/bytecodealliance/regalloc.rs), +register-allocated with default options in that crate's backtracking +allocator, using the Criterion benchmark framework to measure ~620K +instructions per second: + + +```plain +benches/0 time: [365.68 us 367.36 us 369.04 us] + thrpt: [617.82 Kelem/s 620.65 Kelem/s 623.49 Kelem/s] +``` + +I then measured three different fuzztest-SSA-generator test cases in +this allocator, `regalloc2`, measuring between 1.05M and 2.3M +instructions per second (closer to the former for larger functions): + +```plain +==== 459 instructions +benches/0 time: [424.46 us 425.65 us 426.59 us] + thrpt: [1.0760 Melem/s 1.0784 Melem/s 1.0814 Melem/s] + +==== 225 instructions +benches/1 time: [213.05 us 213.28 us 213.54 us] + thrpt: [1.0537 Melem/s 1.0549 Melem/s 1.0561 Melem/s] + +Found 1 outliers among 100 measurements (1.00%) + 1 (1.00%) high mild +==== 21 instructions +benches/2 time: [9.0495 us 9.0571 us 9.0641 us] + thrpt: [2.3168 Melem/s 2.3186 Melem/s 2.3206 Melem/s] + +Found 4 outliers among 100 measurements (4.00%) + 2 (2.00%) high mild + 2 (2.00%) high severe +``` + +Though not apples-to-apples (SSA vs. non-SSA, completely different +code only with similar length), this is at least some evidence that +`regalloc2` is likely to lead to at least a compile-time improvement +when used in e.g. Cranelift. + +### License + +Unless otherwise specified, code in this crate is licensed under the Apache 2.0 +License with LLVM Exception. This license text can be found in the file +`LICENSE`. + +Files in the `src/ion/` directory are directly ported from original C++ code in +IonMonkey, a part of the Firefox codebase. Parts of `src/lib.rs` are also +definitions that are directly translated from this original code. As a result, +these files are derivative works and are covered by the Mozilla Public License +(MPL) 2.0, as described in license headers in those files. Please see the +notices in relevant files for links to the original IonMonkey source files from +which they have been translated/derived. The MPL text can be found in +`src/ion/LICENSE`. + +Parts of the code are derived from regalloc.rs: in particular, +`src/checker.rs` and `src/domtree.rs`. This crate has the same license +as regalloc.rs, so the license on these files does not differ. diff --git a/benches/regalloc.rs b/benches/regalloc.rs new file mode 100644 index 00000000..85cee8c5 --- /dev/null +++ b/benches/regalloc.rs @@ -0,0 +1,56 @@ +//! Criterion-based benchmark target that computes insts/second for +//! arbitrary inputs. + +use arbitrary::{Arbitrary, Unstructured}; +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; +use rand::{Rng, SeedableRng}; +use rand_chacha::ChaCha8Rng; +use regalloc2::fuzzing::func::{machine_env, Func}; +use regalloc2::ion; +use regalloc2::Function; + +fn create_random_func(seed: u64, size: usize) -> Func { + let mut bytes: Vec = vec![]; + bytes.resize(size, 0); + let mut rng = ChaCha8Rng::seed_from_u64(seed); + rng.fill(&mut bytes[..]); + loop { + let mut u = Unstructured::new(&bytes[..]); + match Func::arbitrary(&mut u) { + Ok(f) => { + return f; + } + Err(arbitrary::Error::NotEnoughData) => { + let len = bytes.len(); + bytes.resize(len + 1024, 0); + rng.fill(&mut bytes[len..]); + } + Err(e) => panic!("unexpected error: {:?}", e), + } + } +} + +fn run_regalloc(c: &mut Criterion) { + const SIZE: usize = 1000 * 1000; + env_logger::init(); + let env = machine_env(); + let mut group = c.benchmark_group("benches"); + for iter in 0..3 { + let func = create_random_func(iter, SIZE); + eprintln!("==== {} instructions", func.insts()); + group.throughput(Throughput::Elements(func.insts() as u64)); + group.bench_with_input(BenchmarkId::from_parameter(iter), &iter, |b, _| { + b.iter(|| { + // For fair comparison with regalloc.rs, which needs + // to clone its Func on every alloc, we clone + // too. Seems to make a few percent difference. + let func = func.clone(); + ion::run(&func, &env).expect("regalloc did not succeed"); + }); + }); + } + group.finish(); +} + +criterion_group!(benches, run_regalloc); +criterion_main!(benches); diff --git a/fuzz/.gitignore b/fuzz/.gitignore new file mode 100644 index 00000000..572e03bd --- /dev/null +++ b/fuzz/.gitignore @@ -0,0 +1,4 @@ + +target +corpus +artifacts diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 00000000..e0eec8da --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,54 @@ + +[package] +name = "regalloc2-fuzz" +version = "0.0.0" +authors = ["Chris Fallin "] +license = "MPL-2.0 AND Apache-2.0 WITH LLVM-exception" +publish = false +edition = "2018" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.3" +arbitrary = { version = "^0.4.6", features = ["derive"] } +log = { version = "0.4.8", default-features = false } +env_logger = "0.8.3" + +[dependencies.regalloc2] +path = ".." + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[[bin]] +name = "domtree" +path = "fuzz_targets/domtree.rs" +test = false +doc = false + +[[bin]] +name = "ssagen" +path = "fuzz_targets/ssagen.rs" +test = false +doc = false + +[[bin]] +name = "ion" +path = "fuzz_targets/ion.rs" +test = false +doc = false + +[[bin]] +name = "moves" +path = "fuzz_targets/moves.rs" +test = false +doc = false + +[[bin]] +name = "ion_checker" +path = "fuzz_targets/ion_checker.rs" +test = false +doc = false diff --git a/fuzz/fuzz_targets/domtree.rs b/fuzz/fuzz_targets/domtree.rs new file mode 100644 index 00000000..5923befb --- /dev/null +++ b/fuzz/fuzz_targets/domtree.rs @@ -0,0 +1,128 @@ +#![no_main] +use libfuzzer_sys::arbitrary::{Arbitrary, Result, Unstructured}; +use libfuzzer_sys::fuzz_target; +use std::collections::HashSet; + +use regalloc2::{domtree, postorder, Block}; + +#[derive(Clone, Debug)] +struct CFG { + num_blocks: usize, + preds: Vec>, + succs: Vec>, +} + +impl Arbitrary for CFG { + fn arbitrary(u: &mut Unstructured) -> Result { + let num_blocks = u.int_in_range(1..=1000)?; + let mut succs = vec![]; + for _ in 0..num_blocks { + let mut block_succs = vec![]; + for _ in 0..u.int_in_range(0..=5)? { + block_succs.push(Block::new(u.int_in_range(0..=(num_blocks - 1))?)); + } + succs.push(block_succs); + } + let mut preds = vec![]; + for _ in 0..num_blocks { + preds.push(vec![]); + } + for from in 0..num_blocks { + for succ in &succs[from] { + preds[succ.index()].push(Block::new(from)); + } + } + Ok(CFG { + num_blocks, + preds, + succs, + }) + } +} + +#[derive(Clone, Debug)] +struct Path { + blocks: Vec, +} + +impl Path { + fn choose_from_cfg(cfg: &CFG, u: &mut Unstructured) -> Result { + let succs = u.int_in_range(0..=(2 * cfg.num_blocks))?; + let mut block = Block::new(0); + let mut blocks = vec![]; + blocks.push(block); + for _ in 0..succs { + if cfg.succs[block.index()].is_empty() { + break; + } + block = *u.choose(&cfg.succs[block.index()])?; + blocks.push(block); + } + Ok(Path { blocks }) + } +} + +fn check_idom_violations(idom: &[Block], path: &Path) { + // "a dom b" means that any path from the entry block through the CFG that + // contains a and b will contain a before b. + // + // To test this, for any given block b_i, we have the set S of b_0 .. b_{i-1}, + // and we walk up the domtree from b_i to get all blocks that dominate b_i; + // each such block must appear in S. (Otherwise, we have a counterexample + // for which dominance says it should appear in the path prefix, but it does + // not.) + let mut visited = HashSet::new(); + visited.insert(Block::new(0)); + for block in &path.blocks { + let mut parent = idom[block.index()]; + let mut domset = HashSet::new(); + domset.insert(*block); + loop { + assert!(parent.is_valid()); + assert!(visited.contains(&parent)); + domset.insert(parent); + let next = idom[parent.index()]; + if next == parent { + break; + } + parent = next; + } + // Check that `dominates()` returns true for every block in domset, + // and false for every other block. + for domblock in 0..idom.len() { + let domblock = Block::new(domblock); + assert_eq!(domset.contains(&domblock), domtree::dominates(idom, domblock, *block)); + } + visited.insert(*block); + } +} + +#[derive(Clone, Debug)] +struct TestCase { + cfg: CFG, + path: Path, +} + +impl Arbitrary for TestCase { + fn arbitrary(u: &mut Unstructured) -> Result { + let cfg = CFG::arbitrary(u)?; + let path = Path::choose_from_cfg(&cfg, u)?; + Ok(TestCase { + cfg, + path, + }) + } +} + +fuzz_target!(|testcase: TestCase| { + let postord = postorder::calculate(testcase.cfg.num_blocks, Block::new(0), |block| { + &testcase.cfg.succs[block.index()] + }); + let idom = domtree::calculate( + testcase.cfg.num_blocks, + |block| &testcase.cfg.preds[block.index()], + &postord[..], + Block::new(0), + ); + check_idom_violations(&idom[..], &testcase.path); +}); diff --git a/fuzz/fuzz_targets/ion.rs b/fuzz/fuzz_targets/ion.rs new file mode 100644 index 00000000..dc4a3423 --- /dev/null +++ b/fuzz/fuzz_targets/ion.rs @@ -0,0 +1,11 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; + +use regalloc2::fuzzing::func::Func; + +fuzz_target!(|func: Func| { + let _ = env_logger::try_init(); + log::debug!("func:\n{:?}", func); + let env = regalloc2::fuzzing::func::machine_env(); + let _out = regalloc2::ion::run(&func, &env).expect("regalloc did not succeed"); +}); diff --git a/fuzz/fuzz_targets/ion_checker.rs b/fuzz/fuzz_targets/ion_checker.rs new file mode 100644 index 00000000..e3ce1dc7 --- /dev/null +++ b/fuzz/fuzz_targets/ion_checker.rs @@ -0,0 +1,39 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; +use libfuzzer_sys::arbitrary::{Arbitrary, Unstructured, Result}; + +use regalloc2::fuzzing::func::{Func, Options}; +use regalloc2::checker::Checker; + +#[derive(Clone, Debug)] +struct TestCase { + func: Func, +} + +impl Arbitrary for TestCase { + fn arbitrary(u: &mut Unstructured) -> Result { + Ok(TestCase { + func: Func::arbitrary_with_options(u, &Options { + reused_inputs: true, + fixed_regs: true, + clobbers: true, + control_flow: true, + reducible: false, + block_params: true, + always_local_uses: false, + })?, + }) + } +} + +fuzz_target!(|testcase: TestCase| { + let func = testcase.func; + let _ = env_logger::try_init(); + log::debug!("func:\n{:?}", func); + let env = regalloc2::fuzzing::func::machine_env(); + let out = regalloc2::ion::run(&func, &env).expect("regalloc did not succeed"); + + let mut checker = Checker::new(&func); + checker.prepare(&out); + checker.run().expect("checker failed"); +}); diff --git a/fuzz/fuzz_targets/moves.rs b/fuzz/fuzz_targets/moves.rs new file mode 100644 index 00000000..a719f7c4 --- /dev/null +++ b/fuzz/fuzz_targets/moves.rs @@ -0,0 +1,76 @@ +#![no_main] +use libfuzzer_sys::arbitrary::{Arbitrary, Result, Unstructured}; +use libfuzzer_sys::fuzz_target; + +use regalloc2::moves::ParallelMoves; +use regalloc2::{Allocation, PReg, RegClass}; +use std::collections::HashSet; + +#[derive(Clone, Debug)] +struct TestCase { + moves: Vec<(Allocation, Allocation)>, +} + +impl Arbitrary for TestCase { + fn arbitrary(u: &mut Unstructured) -> Result { + let mut ret = TestCase { moves: vec![] }; + let mut written = HashSet::new(); + while bool::arbitrary(u)? { + let reg1 = u.int_in_range(0..=30)?; + let reg2 = u.int_in_range(0..=30)?; + if written.contains(®2) { + break; + } + written.insert(reg2); + ret.moves.push(( + Allocation::reg(PReg::new(reg1, RegClass::Int)), + Allocation::reg(PReg::new(reg2, RegClass::Int)), + )); + } + Ok(ret) + } +} + +fuzz_target!(|testcase: TestCase| { + let _ = env_logger::try_init(); + let scratch = Allocation::reg(PReg::new(31, RegClass::Int)); + let mut par = ParallelMoves::new(scratch); + for &(src, dst) in &testcase.moves { + par.add(src, dst); + } + let moves = par.resolve(); + + // Compute the final source reg for each dest reg in the original + // parallel-move set. + let mut final_src_per_dest: Vec> = vec![None; 32]; + for &(src, dst) in &testcase.moves { + if let (Some(preg_src), Some(preg_dst)) = (src.as_reg(), dst.as_reg()) { + final_src_per_dest[preg_dst.hw_enc()] = Some(preg_src.hw_enc()); + } + } + + // Simulate the sequence of moves. + let mut regfile: Vec> = vec![None; 32]; + for i in 0..32 { + regfile[i] = Some(i); + } + for (src, dst) in moves { + if let (Some(preg_src), Some(preg_dst)) = (src.as_reg(), dst.as_reg()) { + let data = regfile[preg_src.hw_enc()]; + regfile[preg_dst.hw_enc()] = data; + } else { + panic!("Bad allocation in move list"); + } + } + + // Assert that the expected register-moves occurred. + // N.B.: range up to 31 (not 32) to skip scratch register. + for i in 0..31 { + if let Some(orig_src) = final_src_per_dest[i] { + assert_eq!(regfile[i], Some(orig_src)); + } else { + // Should be untouched. + assert_eq!(regfile[i], Some(i)); + } + } +}); diff --git a/fuzz/fuzz_targets/ssagen.rs b/fuzz/fuzz_targets/ssagen.rs new file mode 100644 index 00000000..e69e71a6 --- /dev/null +++ b/fuzz/fuzz_targets/ssagen.rs @@ -0,0 +1,35 @@ +#![no_main] +use libfuzzer_sys::arbitrary::{Arbitrary, Result, Unstructured}; +use libfuzzer_sys::fuzz_target; + +use regalloc2::cfg::CFGInfo; +use regalloc2::fuzzing::func::{Func, Options}; +use regalloc2::ssa::validate_ssa; + +#[derive(Debug)] +struct TestCase { + f: Func, +} + +impl Arbitrary for TestCase { + fn arbitrary(u: &mut Unstructured) -> Result { + Ok(TestCase { + f: Func::arbitrary_with_options( + u, + &Options { + reused_inputs: true, + fixed_regs: true, + clobbers: true, + control_flow: true, + reducible: false, + always_local_uses: false, + }, + )?, + }) + } +} + +fuzz_target!(|t: TestCase| { + let cfginfo = CFGInfo::new(&t.f); + validate_ssa(&t.f, &cfginfo).expect("invalid SSA"); +}); diff --git a/src/bin/test.rs b/src/bin/test.rs new file mode 100644 index 00000000..6d7c7de6 --- /dev/null +++ b/src/bin/test.rs @@ -0,0 +1,45 @@ +use arbitrary::{Arbitrary, Unstructured}; +use rand::{Rng, SeedableRng}; +use rand_chacha::ChaCha8Rng; +use regalloc2::fuzzing::func::{machine_env, Func}; +use regalloc2::ion; +use regalloc2::Function; + +fn create_random_func(seed: u64, size: usize) -> Func { + let mut bytes: Vec = vec![]; + bytes.resize(size, 0); + let mut rng = ChaCha8Rng::seed_from_u64(seed); + rng.fill(&mut bytes[..]); + loop { + let mut u = Unstructured::new(&bytes[..]); + match Func::arbitrary(&mut u) { + Ok(f) => { + return f; + } + Err(arbitrary::Error::NotEnoughData) => { + let len = bytes.len(); + bytes.resize(len + 1024, 0); + rng.fill(&mut bytes[len..]); + } + Err(e) => panic!("unexpected error: {:?}", e), + } + } +} + +fn main() { + const SIZE: usize = 1000 * 1000; + env_logger::init(); + let env = machine_env(); + for iter in 0..3 { + let func = create_random_func(iter, SIZE); + eprintln!("==== {} instructions", func.insts()); + let mut stats: ion::Stats = ion::Stats::default(); + for i in 0..1000 { + let out = ion::run(&func, &env).expect("regalloc did not succeed"); + if i == 0 { + stats = out.stats; + } + } + eprintln!("Stats: {:?}", stats); + } +} diff --git a/src/bitvec.rs b/src/bitvec.rs new file mode 100644 index 00000000..ce3be7cf --- /dev/null +++ b/src/bitvec.rs @@ -0,0 +1,139 @@ +//! Bit vectors. + +use smallvec::{smallvec, SmallVec}; + +/// A conceptually infinite-length bitvector that allows bitwise operations and +/// iteration over set bits efficiently. +#[derive(Clone, Debug)] +pub struct BitVec { + bits: SmallVec<[u64; 2]>, +} + +const BITS_PER_WORD: usize = 64; + +impl BitVec { + pub fn new() -> Self { + Self { bits: smallvec![] } + } + + pub fn with_capacity(len: usize) -> Self { + let words = (len + BITS_PER_WORD - 1) / BITS_PER_WORD; + Self { + bits: SmallVec::with_capacity(words), + } + } + + #[inline(never)] + fn ensure_idx(&mut self, word: usize) { + let mut target_len = std::cmp::max(2, self.bits.len()); + while word >= target_len { + target_len *= 2; + } + self.bits.resize(target_len, 0); + } + + #[inline(always)] + pub fn set(&mut self, idx: usize, val: bool) { + let word = idx / BITS_PER_WORD; + let bit = idx % BITS_PER_WORD; + if val { + if word >= self.bits.len() { + self.ensure_idx(word); + } + self.bits[word] |= 1 << bit; + } else { + if word < self.bits.len() { + self.bits[word] &= !(1 << bit); + } + } + } + + #[inline(always)] + pub fn get(&mut self, idx: usize) -> bool { + let word = idx / BITS_PER_WORD; + let bit = idx % BITS_PER_WORD; + if word >= self.bits.len() { + false + } else { + (self.bits[word] & (1 << bit)) != 0 + } + } + + pub fn or(&mut self, other: &Self) { + if other.bits.is_empty() { + return; + } + let last_idx = other.bits.len() - 1; + self.ensure_idx(last_idx); + + for (self_word, other_word) in self.bits.iter_mut().zip(other.bits.iter()) { + *self_word |= *other_word; + } + } + + pub fn and(&mut self, other: &Self) { + if other.bits.len() < self.bits.len() { + self.bits.truncate(other.bits.len()); + } + + for (self_word, other_word) in self.bits.iter_mut().zip(other.bits.iter()) { + *self_word &= *other_word; + } + } + + pub fn iter<'a>(&'a self) -> SetBitsIter<'a> { + let cur_word = if self.bits.len() > 0 { self.bits[0] } else { 0 }; + SetBitsIter { + words: &self.bits[..], + word_idx: 0, + cur_word, + } + } +} + +pub struct SetBitsIter<'a> { + words: &'a [u64], + word_idx: usize, + cur_word: u64, +} + +impl<'a> Iterator for SetBitsIter<'a> { + type Item = usize; + fn next(&mut self) -> Option { + while self.cur_word == 0 { + if self.word_idx + 1 >= self.words.len() { + return None; + } + self.word_idx += 1; + self.cur_word = self.words[self.word_idx]; + } + let bitidx = self.cur_word.trailing_zeros(); + self.cur_word &= !(1 << bitidx); + Some(self.word_idx * BITS_PER_WORD + bitidx as usize) + } +} + +#[cfg(test)] +mod test { + use super::BitVec; + + #[test] + fn test_set_bits_iter() { + let mut vec = BitVec::new(); + let mut sum = 0; + for i in 0..1024 { + if i % 17 == 0 { + vec.set(i, true); + sum += i; + } + } + + let mut checksum = 0; + for bit in vec.iter() { + assert!(bit % 17 == 0); + checksum += bit; + } + + assert_eq!(sum, checksum); + } +} diff --git a/src/cfg.rs b/src/cfg.rs new file mode 100644 index 00000000..4c838e78 --- /dev/null +++ b/src/cfg.rs @@ -0,0 +1,110 @@ +//! Lightweight CFG analyses. + +use crate::{domtree, postorder, Block, Function, Inst, OperandKind, ProgPoint}; + +#[derive(Clone, Debug)] +pub struct CFGInfo { + /// Postorder traversal of blocks. + pub postorder: Vec, + /// Domtree parents, indexed by block. + pub domtree: Vec, + /// For each instruction, the block it belongs to. + pub insn_block: Vec, + /// For each vreg, the instruction that defines it, if any. + pub vreg_def_inst: Vec, + /// For each vreg, the block that defines it as a blockparam, if + /// any. (Every vreg must have a valid entry in either + /// `vreg_def_inst` or `vreg_def_blockparam`.) + pub vreg_def_blockparam: Vec<(Block, u32)>, + /// For each block, the first instruction. + pub block_entry: Vec, + /// For each block, the last instruction. + pub block_exit: Vec, + /// For each block, what is its position in its successor's preds, + /// if it has a single successor? + /// + /// (Because we require split critical edges, we always either have a single + /// successor (which itself may have multiple preds), or we have multiple + /// successors but each successor itself has only one pred; so we can store + /// just one value per block and always know any block's position in its + /// successors' preds lists.) + pub pred_pos: Vec, +} + +impl CFGInfo { + pub fn new(f: &F) -> CFGInfo { + let postorder = + postorder::calculate(f.blocks(), f.entry_block(), |block| f.block_succs(block)); + let domtree = domtree::calculate( + f.blocks(), + |block| f.block_preds(block), + &postorder[..], + f.entry_block(), + ); + let mut insn_block = vec![Block::invalid(); f.insts()]; + let mut vreg_def_inst = vec![Inst::invalid(); f.num_vregs()]; + let mut vreg_def_blockparam = vec![(Block::invalid(), 0); f.num_vregs()]; + let mut block_entry = vec![ProgPoint::before(Inst::invalid()); f.blocks()]; + let mut block_exit = vec![ProgPoint::before(Inst::invalid()); f.blocks()]; + let mut pred_pos = vec![0; f.blocks()]; + + for block in 0..f.blocks() { + let block = Block::new(block); + for (i, param) in f.block_params(block).iter().enumerate() { + vreg_def_blockparam[param.vreg()] = (block, i as u32); + } + for inst in f.block_insns(block).iter() { + insn_block[inst.index()] = block; + for operand in f.inst_operands(inst) { + match operand.kind() { + OperandKind::Def => { + vreg_def_inst[operand.vreg().vreg()] = inst; + } + _ => {} + } + } + } + block_entry[block.index()] = ProgPoint::before(f.block_insns(block).first()); + block_exit[block.index()] = ProgPoint::after(f.block_insns(block).last()); + + if f.block_preds(block).len() > 1 { + for (i, &pred) in f.block_preds(block).iter().enumerate() { + // Assert critical edge condition. + assert_eq!( + f.block_succs(pred).len(), + 1, + "Edge {} -> {} is critical", + pred.index(), + block.index(), + ); + pred_pos[pred.index()] = i; + } + } + } + + CFGInfo { + postorder, + domtree, + insn_block, + vreg_def_inst, + vreg_def_blockparam, + block_entry, + block_exit, + pred_pos, + } + } + + pub fn dominates(&self, a: Block, b: Block) -> bool { + domtree::dominates(&self.domtree[..], a, b) + } + + /// Return the position of this block in its successor's predecessor list. + /// + /// Because the CFG must have split critical edges, we actually do not need + /// to know *which* successor: if there is more than one, then each + /// successor has only one predecessor (that's this block), so the answer is + /// `0` no matter which successor we are considering. + pub fn pred_position(&self, block: Block) -> usize { + self.pred_pos[block.index()] + } +} diff --git a/src/checker.rs b/src/checker.rs new file mode 100644 index 00000000..5cdcb602 --- /dev/null +++ b/src/checker.rs @@ -0,0 +1,615 @@ +/* + * The following code is derived from `lib/src/checker.rs` in the + * regalloc.rs project + * (https://github.com/bytecodealliance/regalloc.rs). regalloc.rs is + * also licensed under Apache-2.0 with the LLVM exception, as the rest + * of regalloc2's non-Ion-derived code is. + */ + +//! Checker: verifies that spills/reloads/moves retain equivalent +//! dataflow to original, VReg-based code. +//! +//! The basic idea is that we track symbolic values as they flow +//! through spills and reloads. The symbolic values represent +//! particular virtual registers in the original function body +//! presented to the register allocator. Any instruction in the +//! original function body (i.e., not added by the allocator) +//! conceptually generates a symbolic value "Vn" when storing to (or +//! modifying) a virtual register. +//! +//! Operand policies (fixed register, register, any) are also checked +//! at each operand. +//! +//! The dataflow analysis state at each program point is: +//! +//! - map of: Allocation -> lattice value (top > Vn symbols (unordered) > bottom) +//! +//! And the transfer functions for instructions are: +//! +//! - `Edit::Move` inserted by RA: [ alloc_d := alloc_s ] +//! +//! A[alloc_d] := A[alloc_s] +//! +//! - phi-node [ V_i := phi block_j:V_j, block_k:V_k, ... ] +//! with allocations [ A_i := phi block_j:A_j, block_k:A_k, ... ] +//! (N.B.: phi-nodes are not semantically present in the final +//! machine code, but we include their allocations so that this +//! checker can work) +//! +//! A[A_i] := meet(A_j, A_k, ...) +//! +//! - statement in pre-regalloc function [ V_i := op V_j, V_k, ... ] +//! with allocated form [ A_i := op A_j, A_k, ... ] +//! +//! A[A_i] := `V_i` +//! +//! In other words, a statement, even after allocation, generates +//! a symbol that corresponds to its original virtual-register +//! def. +//! +//! (N.B.: moves in pre-regalloc function fall into this last case +//! -- they are "just another operation" and generate a new +//! symbol) +//! +//! At control-flow join points, the symbols meet using a very simple +//! lattice meet-function: two different symbols in the same +//! allocation meet to "conflicted"; otherwise, the symbol meets with +//! itself to produce itself (reflexivity). +//! +//! To check correctness, we first find the dataflow fixpoint with the +//! above lattice and transfer/meet functions. Then, at each op, we +//! examine the dataflow solution at the preceding program point, and +//! check that the allocation for each op arg (input/use) contains the +//! symbol corresponding to the original virtual register specified +//! for this arg. + +#![allow(dead_code)] + +use crate::{ + Allocation, AllocationKind, Block, Edit, Function, Inst, InstPosition, Operand, OperandKind, + OperandPolicy, OperandPos, Output, ProgPoint, VReg, +}; + +use std::collections::{HashMap, VecDeque}; +use std::default::Default; +use std::hash::Hash; +use std::result::Result; + +use log::debug; + +/// A set of errors detected by the regalloc checker. +#[derive(Clone, Debug)] +pub struct CheckerErrors { + errors: Vec, +} + +/// A single error detected by the regalloc checker. +#[derive(Clone, Debug)] +pub enum CheckerError { + MissingAllocation { + inst: Inst, + op: Operand, + }, + UnknownValueInAllocation { + inst: Inst, + op: Operand, + alloc: Allocation, + }, + ConflictedValueInAllocation { + inst: Inst, + op: Operand, + alloc: Allocation, + }, + IncorrectValueInAllocation { + inst: Inst, + op: Operand, + alloc: Allocation, + actual: VReg, + }, + PolicyViolated { + inst: Inst, + op: Operand, + alloc: Allocation, + }, + AllocationIsNotReg { + inst: Inst, + op: Operand, + alloc: Allocation, + }, + AllocationIsNotFixedReg { + inst: Inst, + op: Operand, + alloc: Allocation, + }, + AllocationIsNotReuse { + inst: Inst, + op: Operand, + alloc: Allocation, + expected_alloc: Allocation, + }, +} + +/// Abstract state for an allocation. +/// +/// Forms a lattice with \top (`Unknown`), \bot (`Conflicted`), and a +/// number of mutually unordered value-points in between, one per real +/// or virtual register. Any two different registers meet to \bot. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum CheckerValue { + /// "top" value: this storage slot has no known value. + Unknown, + /// "bottom" value: this storage slot has a conflicted value. + Conflicted, + /// Reg: this storage slot has a value that originated as a def + /// into the given virtual register. + /// + /// The boolean flag indicates whether the value is + /// reference-typed. + Reg(VReg, bool), +} + +impl Default for CheckerValue { + fn default() -> CheckerValue { + CheckerValue::Unknown + } +} + +impl CheckerValue { + /// Meet function of the abstract-interpretation value lattice. + fn meet(&self, other: &CheckerValue) -> CheckerValue { + match (self, other) { + (&CheckerValue::Unknown, _) => *other, + (_, &CheckerValue::Unknown) => *self, + (&CheckerValue::Conflicted, _) => *self, + (_, &CheckerValue::Conflicted) => *other, + (&CheckerValue::Reg(r1, ref1), &CheckerValue::Reg(r2, ref2)) if r1 == r2 => { + CheckerValue::Reg(r1, ref1 || ref2) + } + _ => { + log::debug!("{:?} and {:?} meet to Conflicted", self, other); + CheckerValue::Conflicted + } + } + } +} + +/// State that steps through program points as we scan over the instruction stream. +#[derive(Clone, Debug, PartialEq, Eq)] +struct CheckerState { + allocations: HashMap, +} + +impl Default for CheckerState { + fn default() -> CheckerState { + CheckerState { + allocations: HashMap::new(), + } + } +} + +impl std::fmt::Display for CheckerValue { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + CheckerValue::Unknown => write!(f, "?"), + CheckerValue::Conflicted => write!(f, "!"), + CheckerValue::Reg(r, _) => write!(f, "{}", r), + } + } +} + +fn merge_map( + into: &mut HashMap, + from: &HashMap, +) { + for (k, v) in from { + let into_v = into.entry(*k).or_insert(Default::default()); + let merged = into_v.meet(v); + *into_v = merged; + } +} + +impl CheckerState { + /// Create a new checker state. + fn new() -> CheckerState { + Default::default() + } + + /// Merge this checker state with another at a CFG join-point. + fn meet_with(&mut self, other: &CheckerState) { + merge_map(&mut self.allocations, &other.allocations); + } + + fn check_val( + &self, + inst: Inst, + op: Operand, + alloc: Allocation, + val: CheckerValue, + allocs: &[Allocation], + ) -> Result<(), CheckerError> { + if alloc == Allocation::none() { + return Err(CheckerError::MissingAllocation { inst, op }); + } + + match val { + CheckerValue::Unknown => { + return Err(CheckerError::UnknownValueInAllocation { inst, op, alloc }); + } + CheckerValue::Conflicted => { + return Err(CheckerError::ConflictedValueInAllocation { inst, op, alloc }); + } + CheckerValue::Reg(r, _) if r != op.vreg() => { + return Err(CheckerError::IncorrectValueInAllocation { + inst, + op, + alloc, + actual: r, + }); + } + _ => {} + } + + self.check_policy(inst, op, alloc, allocs)?; + + Ok(()) + } + + /// Check an instruction against this state. This must be called + /// twice: once with `InstPosition::Before`, and once with + /// `InstPosition::After` (after updating state with defs). + fn check(&self, pos: InstPosition, checkinst: &CheckerInst) -> Result<(), CheckerError> { + match checkinst { + &CheckerInst::Op { + inst, + ref operands, + ref allocs, + .. + } => { + // Skip Use-checks at the After point if there are any + // reused inputs: the Def which reuses the input + // happens early. + let has_reused_input = operands + .iter() + .any(|op| matches!(op.policy(), OperandPolicy::Reuse(_))); + if has_reused_input && pos == InstPosition::After { + return Ok(()); + } + + // For each operand, check (i) that the allocation + // contains the expected vreg, and (ii) that it meets + // the requirements of the OperandPolicy. + for (op, alloc) in operands.iter().zip(allocs.iter()) { + let is_here = match (op.pos(), pos) { + (OperandPos::Before, InstPosition::Before) + | (OperandPos::Both, InstPosition::Before) => true, + (OperandPos::After, InstPosition::After) + | (OperandPos::Both, InstPosition::After) => true, + _ => false, + }; + if !is_here { + continue; + } + if op.kind() == OperandKind::Def { + continue; + } + + let val = self + .allocations + .get(alloc) + .cloned() + .unwrap_or(Default::default()); + debug!( + "checker: checkinst {:?}: op {:?}, alloc {:?}, checker value {:?}", + checkinst, op, alloc, val + ); + self.check_val(inst, *op, *alloc, val, allocs)?; + } + } + _ => {} + } + Ok(()) + } + + /// Update according to instruction. + fn update(&mut self, checkinst: &CheckerInst) { + match checkinst { + &CheckerInst::Move { into, from } => { + let val = self + .allocations + .get(&from) + .cloned() + .unwrap_or(Default::default()); + debug!( + "checker: checkinst {:?} updating: move {:?} -> {:?} val {:?}", + checkinst, from, into, val + ); + self.allocations.insert(into, val); + } + &CheckerInst::Op { + ref operands, + ref allocs, + .. + } => { + for (op, alloc) in operands.iter().zip(allocs.iter()) { + if op.kind() != OperandKind::Def { + continue; + } + self.allocations + .insert(*alloc, CheckerValue::Reg(op.vreg(), false)); + } + } + &CheckerInst::BlockParams { + ref vregs, + ref allocs, + .. + } => { + for (vreg, alloc) in vregs.iter().zip(allocs.iter()) { + self.allocations + .insert(*alloc, CheckerValue::Reg(*vreg, false)); + } + } + } + } + + fn check_policy( + &self, + inst: Inst, + op: Operand, + alloc: Allocation, + allocs: &[Allocation], + ) -> Result<(), CheckerError> { + match op.policy() { + OperandPolicy::Any => {} + OperandPolicy::Reg => { + if alloc.kind() != AllocationKind::Reg { + return Err(CheckerError::AllocationIsNotReg { inst, op, alloc }); + } + } + OperandPolicy::FixedReg(preg) => { + if alloc != Allocation::reg(preg) { + return Err(CheckerError::AllocationIsNotFixedReg { inst, op, alloc }); + } + } + OperandPolicy::Reuse(idx) => { + if alloc.kind() != AllocationKind::Reg { + return Err(CheckerError::AllocationIsNotReg { inst, op, alloc }); + } + if alloc != allocs[idx] { + return Err(CheckerError::AllocationIsNotReuse { + inst, + op, + alloc, + expected_alloc: allocs[idx], + }); + } + } + } + Ok(()) + } +} + +/// An instruction representation in the checker's BB summary. +#[derive(Clone, Debug)] +pub(crate) enum CheckerInst { + /// A move between allocations (these could be registers or + /// spillslots). + Move { into: Allocation, from: Allocation }, + + /// A regular instruction with fixed use and def slots. Contains + /// both the original operands (as given to the regalloc) and the + /// allocation results. + Op { + inst: Inst, + operands: Vec, + allocs: Vec, + }, + + /// The top of a block with blockparams. We define the given vregs + /// into the given allocations. + BlockParams { + block: Block, + vregs: Vec, + allocs: Vec, + }, +} + +#[derive(Debug)] +pub struct Checker<'a, F: Function> { + f: &'a F, + bb_in: HashMap, + bb_insts: HashMap>, +} + +impl<'a, F: Function> Checker<'a, F> { + /// Create a new checker for the given function, initializing CFG + /// info immediately. The client should call the `add_*()` + /// methods to add abstract instructions to each BB before + /// invoking `run()` to check for errors. + pub fn new(f: &'a F) -> Checker<'a, F> { + let mut bb_in = HashMap::new(); + let mut bb_insts = HashMap::new(); + + for block in 0..f.blocks() { + let block = Block::new(block); + bb_in.insert(block, Default::default()); + bb_insts.insert(block, vec![]); + } + + Checker { f, bb_in, bb_insts } + } + + /// Build the list of checker instructions based on the given func + /// and allocation results. + pub fn prepare(&mut self, out: &Output) { + debug!("checker: out = {:?}", out); + // For each original instruction, create an `Op`. + let mut last_inst = None; + let mut insert_idx = 0; + for block in 0..self.f.blocks() { + let block = Block::new(block); + for inst in self.f.block_insns(block).iter() { + assert!(last_inst.is_none() || inst > last_inst.unwrap()); + last_inst = Some(inst); + + // Any inserted edits before instruction. + self.handle_edits(block, out, &mut insert_idx, ProgPoint::before(inst)); + + // Instruction itself. + let operands: Vec<_> = self.f.inst_operands(inst).iter().cloned().collect(); + let allocs: Vec<_> = out.inst_allocs(inst).iter().cloned().collect(); + let checkinst = CheckerInst::Op { + inst, + operands, + allocs, + }; + debug!("checker: adding inst {:?}", checkinst); + self.bb_insts.get_mut(&block).unwrap().push(checkinst); + + // Any inserted edits after instruction. + self.handle_edits(block, out, &mut insert_idx, ProgPoint::after(inst)); + } + } + } + + fn handle_edits(&mut self, block: Block, out: &Output, idx: &mut usize, pos: ProgPoint) { + while *idx < out.edits.len() && out.edits[*idx].0 <= pos { + let &(edit_pos, ref edit) = &out.edits[*idx]; + *idx += 1; + if edit_pos < pos { + continue; + } + debug!("checker: adding edit {:?} at pos {:?}", edit, pos); + match edit { + &Edit::Move { from, to, .. } => { + self.bb_insts + .get_mut(&block) + .unwrap() + .push(CheckerInst::Move { into: to, from }); + } + &Edit::BlockParams { + ref vregs, + ref allocs, + } => { + let inst = CheckerInst::BlockParams { + block, + vregs: vregs.clone(), + allocs: allocs.clone(), + }; + self.bb_insts.get_mut(&block).unwrap().push(inst); + } + } + } + } + + /// Perform the dataflow analysis to compute checker state at each BB entry. + fn analyze(&mut self) { + let mut queue = VecDeque::new(); + queue.push_back(self.f.entry_block()); + + while !queue.is_empty() { + let block = queue.pop_front().unwrap(); + let mut state = self.bb_in.get(&block).cloned().unwrap(); + debug!("analyze: block {} has state {:?}", block.index(), state); + for inst in self.bb_insts.get(&block).unwrap() { + state.update(inst); + debug!("analyze: inst {:?} -> state {:?}", inst, state); + } + + for &succ in self.f.block_succs(block) { + let cur_succ_in = self.bb_in.get(&succ).unwrap(); + let mut new_state = state.clone(); + new_state.meet_with(cur_succ_in); + let changed = &new_state != cur_succ_in; + if changed { + debug!( + "analyze: block {} state changed from {:?} to {:?}; pushing onto queue", + succ.index(), + cur_succ_in, + new_state + ); + self.bb_in.insert(succ, new_state); + queue.push_back(succ); + } + } + } + } + + /// Using BB-start state computed by `analyze()`, step the checker state + /// through each BB and check each instruction's register allocations + /// for errors. + fn find_errors(&self) -> Result<(), CheckerErrors> { + let mut errors = vec![]; + for (block, input) in &self.bb_in { + let mut state = input.clone(); + for inst in self.bb_insts.get(block).unwrap() { + if let Err(e) = state.check(InstPosition::Before, inst) { + debug!("Checker error: {:?}", e); + errors.push(e); + } + state.update(inst); + if let Err(e) = state.check(InstPosition::After, inst) { + debug!("Checker error: {:?}", e); + errors.push(e); + } + } + } + + if errors.is_empty() { + Ok(()) + } else { + Err(CheckerErrors { errors }) + } + } + + /// Find any errors, returning `Err(CheckerErrors)` with all errors found + /// or `Ok(())` otherwise. + pub fn run(mut self) -> Result<(), CheckerErrors> { + self.analyze(); + let result = self.find_errors(); + + debug!("=== CHECKER RESULT ==="); + fn print_state(state: &CheckerState) { + let mut s = vec![]; + for (alloc, state) in &state.allocations { + s.push(format!("{} := {}", alloc, state)); + } + debug!(" {{ {} }}", s.join(", ")) + } + for bb in 0..self.f.blocks() { + let bb = Block::new(bb); + debug!("block{}:", bb.index()); + let insts = self.bb_insts.get(&bb).unwrap(); + let mut state = self.bb_in.get(&bb).unwrap().clone(); + print_state(&state); + for inst in insts { + match inst { + &CheckerInst::Op { + inst, + ref operands, + ref allocs, + } => { + debug!(" inst{}: {:?} ({:?})", inst.index(), operands, allocs); + } + &CheckerInst::Move { from, into } => { + debug!(" {} -> {}", from, into); + } + &CheckerInst::BlockParams { + ref vregs, + ref allocs, + .. + } => { + let mut args = vec![]; + for (vreg, alloc) in vregs.iter().zip(allocs.iter()) { + args.push(format!("{}:{}", vreg, alloc)); + } + debug!(" blockparams: {}", args.join(", ")); + } + } + state.update(inst); + print_state(&state); + } + } + + result + } +} diff --git a/src/domtree.rs b/src/domtree.rs new file mode 100644 index 00000000..7677583f --- /dev/null +++ b/src/domtree.rs @@ -0,0 +1,118 @@ +/* + * Derives from the dominator tree implementation in regalloc.rs, which is + * licensed under the Apache Public License 2.0 with LLVM Exception. See: + * https://github.com/bytecodealliance/regalloc.rs + */ + +// This is an implementation of the algorithm described in +// +// A Simple, Fast Dominance Algorithm +// Keith D. Cooper, Timothy J. Harvey, and Ken Kennedy +// Department of Computer Science, Rice University, Houston, Texas, USA +// TR-06-33870 +// https://www.cs.rice.edu/~keith/EMBED/dom.pdf + +use crate::Block; + +// Helper +fn merge_sets( + idom: &[Block], // map from Block to Block + block_to_rpo: &[Option], + mut node1: Block, + mut node2: Block, +) -> Block { + while node1 != node2 { + if node1.is_invalid() || node2.is_invalid() { + return Block::invalid(); + } + let rpo1 = block_to_rpo[node1.index()].unwrap(); + let rpo2 = block_to_rpo[node2.index()].unwrap(); + if rpo1 > rpo2 { + node1 = idom[node1.index()]; + } else if rpo2 > rpo1 { + node2 = idom[node2.index()]; + } + } + assert!(node1 == node2); + node1 +} + +pub fn calculate<'a, PredFn: Fn(Block) -> &'a [Block]>( + num_blocks: usize, + preds: PredFn, + post_ord: &[Block], + start: Block, +) -> Vec { + // We have post_ord, which is the postorder sequence. + + // Compute maps from RPO to block number and vice-versa. + let mut block_to_rpo = vec![None; num_blocks]; + block_to_rpo.resize(num_blocks, None); + for (i, rpo_block) in post_ord.iter().rev().enumerate() { + block_to_rpo[rpo_block.index()] = Some(i as u32); + } + + let mut idom = vec![Block::invalid(); num_blocks]; + + // The start node must have itself as a parent. + idom[start.index()] = start; + + let mut changed = true; + while changed { + changed = false; + // Consider blocks in reverse postorder. Skip any that are unreachable. + for &node in post_ord.iter().rev() { + let rponum = block_to_rpo[node.index()].unwrap(); + + let mut parent = Block::invalid(); + for &pred in preds(node).iter() { + let pred_rpo = match block_to_rpo[pred.index()] { + Some(r) => r, + None => { + // Skip unreachable preds. + continue; + } + }; + if pred_rpo < rponum { + parent = pred; + break; + } + } + + if parent.is_valid() { + for &pred in preds(node).iter() { + if pred == parent { + continue; + } + if idom[pred.index()].is_invalid() { + continue; + } + parent = merge_sets(&idom, &block_to_rpo[..], parent, pred); + } + } + + if parent.is_valid() && parent != idom[node.index()] { + idom[node.index()] = parent; + changed = true; + } + } + } + + idom +} + +pub fn dominates(idom: &[Block], a: Block, mut b: Block) -> bool { + loop { + if a == b { + return true; + } + if b.is_invalid() { + return false; + } + let parent = idom[b.index()]; + if b == parent { + return false; + } + b = idom[b.index()]; + } +} diff --git a/src/fuzzing/func.rs b/src/fuzzing/func.rs new file mode 100644 index 00000000..ba38e985 --- /dev/null +++ b/src/fuzzing/func.rs @@ -0,0 +1,542 @@ +use crate::{ + domtree, postorder, Allocation, Block, Function, Inst, InstRange, MachineEnv, Operand, + OperandKind, OperandPolicy, OperandPos, PReg, RegClass, VReg, +}; + +use arbitrary::Result as ArbitraryResult; +use arbitrary::{Arbitrary, Unstructured}; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum InstOpcode { + Phi, + Op, + Call, + Ret, + Branch, +} + +#[derive(Clone, Debug)] +pub struct InstData { + op: InstOpcode, + operands: Vec, + clobbers: Vec, +} + +impl InstData { + pub fn op(def: usize, uses: &[usize]) -> InstData { + let mut operands = vec![Operand::reg_def(VReg::new(def, RegClass::Int))]; + for &u in uses { + operands.push(Operand::reg_use(VReg::new(u, RegClass::Int))); + } + InstData { + op: InstOpcode::Op, + operands, + clobbers: vec![], + } + } + pub fn branch(uses: &[usize]) -> InstData { + let mut operands = vec![]; + for &u in uses { + operands.push(Operand::reg_use(VReg::new(u, RegClass::Int))); + } + InstData { + op: InstOpcode::Branch, + operands, + clobbers: vec![], + } + } + pub fn ret() -> InstData { + InstData { + op: InstOpcode::Ret, + operands: vec![], + clobbers: vec![], + } + } +} + +#[derive(Clone)] +pub struct Func { + insts: Vec, + blocks: Vec, + block_preds: Vec>, + block_succs: Vec>, + block_params: Vec>, + num_vregs: usize, +} + +impl Function for Func { + fn insts(&self) -> usize { + self.insts.len() + } + + fn blocks(&self) -> usize { + self.blocks.len() + } + + fn entry_block(&self) -> Block { + assert!(self.blocks.len() > 0); + Block::new(0) + } + + fn block_insns(&self, block: Block) -> InstRange { + self.blocks[block.index()] + } + + fn block_succs(&self, block: Block) -> &[Block] { + &self.block_succs[block.index()][..] + } + + fn block_preds(&self, block: Block) -> &[Block] { + &self.block_preds[block.index()][..] + } + + fn block_params(&self, block: Block) -> &[VReg] { + &self.block_params[block.index()][..] + } + + fn is_call(&self, insn: Inst) -> bool { + self.insts[insn.index()].op == InstOpcode::Call + } + + fn is_ret(&self, insn: Inst) -> bool { + self.insts[insn.index()].op == InstOpcode::Ret + } + + fn is_branch(&self, insn: Inst) -> bool { + self.insts[insn.index()].op == InstOpcode::Branch + } + + fn is_safepoint(&self, _: Inst) -> bool { + false + } + + fn is_move(&self, _: Inst) -> Option<(VReg, VReg)> { + None + } + + fn inst_operands(&self, insn: Inst) -> &[Operand] { + &self.insts[insn.index()].operands[..] + } + + fn inst_clobbers(&self, insn: Inst) -> &[PReg] { + &self.insts[insn.index()].clobbers[..] + } + + fn num_vregs(&self) -> usize { + self.num_vregs + } + + fn spillslot_size(&self, regclass: RegClass, _: VReg) -> usize { + match regclass { + RegClass::Int => 1, + RegClass::Float => 2, + } + } +} + +struct FuncBuilder { + postorder: Vec, + idom: Vec, + f: Func, + insts_per_block: Vec>, +} + +impl FuncBuilder { + fn new() -> Self { + FuncBuilder { + postorder: vec![], + idom: vec![], + f: Func { + block_preds: vec![], + block_succs: vec![], + block_params: vec![], + insts: vec![], + blocks: vec![], + num_vregs: 0, + }, + insts_per_block: vec![], + } + } + + pub fn add_block(&mut self) -> Block { + let b = Block::new(self.f.blocks.len()); + self.f + .blocks + .push(InstRange::forward(Inst::new(0), Inst::new(0))); + self.f.block_preds.push(vec![]); + self.f.block_succs.push(vec![]); + self.f.block_params.push(vec![]); + self.insts_per_block.push(vec![]); + b + } + + pub fn add_inst(&mut self, block: Block, data: InstData) { + self.insts_per_block[block.index()].push(data); + } + + pub fn add_edge(&mut self, from: Block, to: Block) { + self.f.block_succs[from.index()].push(to); + self.f.block_preds[to.index()].push(from); + } + + pub fn set_block_params(&mut self, block: Block, params: &[VReg]) { + self.f.block_params[block.index()] = params.iter().cloned().collect(); + } + + fn compute_doms(&mut self) { + self.postorder = postorder::calculate(self.f.blocks.len(), Block::new(0), |block| { + &self.f.block_succs[block.index()][..] + }); + self.idom = domtree::calculate( + self.f.blocks.len(), + |block| &self.f.block_preds[block.index()][..], + &self.postorder[..], + Block::new(0), + ); + } + + fn finalize(mut self) -> Func { + for (blocknum, blockrange) in self.f.blocks.iter_mut().enumerate() { + let begin_inst = self.f.insts.len(); + for inst in &self.insts_per_block[blocknum] { + self.f.insts.push(inst.clone()); + } + let end_inst = self.f.insts.len(); + *blockrange = InstRange::forward(Inst::new(begin_inst), Inst::new(end_inst)); + } + + self.f + } +} + +impl Arbitrary for OperandPolicy { + fn arbitrary(u: &mut Unstructured) -> ArbitraryResult { + Ok(*u.choose(&[OperandPolicy::Any, OperandPolicy::Reg])?) + } +} + +fn choose_dominating_block( + idom: &[Block], + mut block: Block, + allow_self: bool, + u: &mut Unstructured, +) -> ArbitraryResult { + assert!(block.is_valid()); + let orig_block = block; + loop { + if (allow_self || block != orig_block) && bool::arbitrary(u)? { + break; + } + if idom[block.index()] == block { + break; + } + block = idom[block.index()]; + assert!(block.is_valid()); + } + let block = if block != orig_block || allow_self { + block + } else { + Block::invalid() + }; + Ok(block) +} + +#[derive(Clone, Copy, Debug)] +pub struct Options { + pub reused_inputs: bool, + pub fixed_regs: bool, + pub clobbers: bool, + pub control_flow: bool, + pub reducible: bool, + pub block_params: bool, + pub always_local_uses: bool, +} + +impl std::default::Default for Options { + fn default() -> Self { + Options { + reused_inputs: false, + fixed_regs: false, + clobbers: false, + control_flow: true, + reducible: false, + block_params: true, + always_local_uses: false, + } + } +} + +impl Arbitrary for Func { + fn arbitrary(u: &mut Unstructured) -> ArbitraryResult { + Func::arbitrary_with_options(u, &Options::default()) + } +} + +impl Func { + pub fn arbitrary_with_options(u: &mut Unstructured, opts: &Options) -> ArbitraryResult { + // General strategy: + // 1. Create an arbitrary CFG. + // 2. Create a list of vregs to define in each block. + // 3. Define some of those vregs in each block as blockparams.f. + // 4. Populate blocks with ops that define the rest of the vregs. + // - For each use, choose an available vreg: either one + // already defined (via blockparam or inst) in this block, + // or one defined in a dominating block. + + let mut builder = FuncBuilder::new(); + for _ in 0..u.int_in_range(1..=100)? { + builder.add_block(); + } + let num_blocks = builder.f.blocks.len(); + + // Generate a CFG. Create a "spine" of either single blocks, + // with links to the next; or fork patterns, with the left + // fork linking to the next and the right fork in `out_blocks` + // to be connected below. This creates an arbitrary CFG with + // split critical edges, which is a property that we require + // for the regalloc. + let mut from = 0; + let mut out_blocks = vec![]; + let mut in_blocks = vec![]; + // For reducibility, if selected: enforce strict nesting of backedges + let mut max_backedge_src = 0; + let mut min_backedge_dest = num_blocks; + while from < num_blocks { + in_blocks.push(from); + if num_blocks > 3 && from < num_blocks - 3 && bool::arbitrary(u)? && opts.control_flow { + // To avoid critical edges, we use from+1 as an edge + // block, and advance `from` an extra block; `from+2` + // will be the next normal iteration. + builder.add_edge(Block::new(from), Block::new(from + 1)); + builder.add_edge(Block::new(from), Block::new(from + 2)); + builder.add_edge(Block::new(from + 2), Block::new(from + 3)); + out_blocks.push(from + 1); + from += 2; + } else if from < num_blocks - 1 { + builder.add_edge(Block::new(from), Block::new(from + 1)); + } + from += 1; + } + for pred in out_blocks { + let mut succ = *u.choose(&in_blocks[..])?; + if opts.reducible && (pred >= succ) { + if pred < max_backedge_src || succ > min_backedge_dest { + // If the chosen edge would result in an + // irreducible CFG, just make this a diamond + // instead. + succ = pred + 2; + } else { + max_backedge_src = pred; + min_backedge_dest = succ; + } + } + builder.add_edge(Block::new(pred), Block::new(succ)); + } + + builder.compute_doms(); + + for block in 0..num_blocks { + builder.f.block_preds[block].clear(); + } + for block in 0..num_blocks { + for &succ in &builder.f.block_succs[block] { + builder.f.block_preds[succ.index()].push(Block::new(block)); + } + } + + builder.compute_doms(); + + let mut vregs_by_block = vec![]; + let mut vregs_by_block_to_be_defined = vec![]; + let mut block_params = vec![vec![]; num_blocks]; + for block in 0..num_blocks { + let mut vregs = vec![]; + for _ in 0..u.int_in_range(5..=15)? { + let vreg = VReg::new(builder.f.num_vregs, RegClass::Int); + builder.f.num_vregs += 1; + vregs.push(vreg); + } + vregs_by_block.push(vregs.clone()); + vregs_by_block_to_be_defined.push(vec![]); + let mut max_block_params = u.int_in_range(0..=std::cmp::min(3, vregs.len() / 3))?; + for &vreg in &vregs { + if block > 0 && opts.block_params && bool::arbitrary(u)? && max_block_params > 0 { + block_params[block].push(vreg); + max_block_params -= 1; + } else { + vregs_by_block_to_be_defined.last_mut().unwrap().push(vreg); + } + } + vregs_by_block_to_be_defined.last_mut().unwrap().reverse(); + builder.set_block_params(Block::new(block), &block_params[block][..]); + } + + for block in 0..num_blocks { + let mut avail = block_params[block].clone(); + let mut remaining_nonlocal_uses = u.int_in_range(0..=3)?; + while let Some(vreg) = vregs_by_block_to_be_defined[block].pop() { + let def_policy = OperandPolicy::arbitrary(u)?; + let def_pos = if bool::arbitrary(u)? { + OperandPos::Before + } else { + OperandPos::After + }; + let mut operands = vec![Operand::new(vreg, def_policy, OperandKind::Def, def_pos)]; + let mut allocations = vec![Allocation::none()]; + for _ in 0..u.int_in_range(0..=3)? { + let vreg = if avail.len() > 0 + && (opts.always_local_uses + || remaining_nonlocal_uses == 0 + || bool::arbitrary(u)?) + { + *u.choose(&avail[..])? + } else if !opts.always_local_uses { + let def_block = choose_dominating_block( + &builder.idom[..], + Block::new(block), + /* allow_self = */ false, + u, + )?; + if !def_block.is_valid() { + // No vregs already defined, and no pred blocks that dominate us + // (perhaps we are the entry block): just stop generating inputs. + break; + } + remaining_nonlocal_uses -= 1; + *u.choose(&vregs_by_block[def_block.index()])? + } else { + break; + }; + let use_policy = OperandPolicy::arbitrary(u)?; + operands.push(Operand::new( + vreg, + use_policy, + OperandKind::Use, + OperandPos::Before, + )); + allocations.push(Allocation::none()); + } + let mut clobbers: Vec = vec![]; + if operands.len() > 1 && opts.reused_inputs && bool::arbitrary(u)? { + // Make the def a reused input. + let op = operands[0]; + assert_eq!(op.kind(), OperandKind::Def); + let reused = u.int_in_range(1..=(operands.len() - 1))?; + operands[0] = Operand::new( + op.vreg(), + OperandPolicy::Reuse(reused), + op.kind(), + OperandPos::After, + ); + } else if opts.fixed_regs && bool::arbitrary(u)? { + // Pick an operand and make it a fixed reg. + let fixed_reg = PReg::new(u.int_in_range(0..=30)?, RegClass::Int); + let i = u.int_in_range(0..=(operands.len() - 1))?; + let op = operands[i]; + operands[i] = Operand::new( + op.vreg(), + OperandPolicy::FixedReg(fixed_reg), + op.kind(), + op.pos(), + ); + } else if opts.clobbers && bool::arbitrary(u)? { + for _ in 0..u.int_in_range(0..=5)? { + let reg = u.int_in_range(0..=30)?; + if clobbers.iter().any(|r| r.hw_enc() == reg) { + break; + } + clobbers.push(PReg::new(reg, RegClass::Int)); + } + } + let op = *u.choose(&[InstOpcode::Op, InstOpcode::Call])?; + builder.add_inst( + Block::new(block), + InstData { + op, + operands, + clobbers, + }, + ); + avail.push(vreg); + } + + // Define the branch with blockparam args that must end + // the block. + if builder.f.block_succs[block].len() > 0 { + let mut args = vec![]; + for &succ in &builder.f.block_succs[block] { + for _ in 0..builder.f.block_params[succ.index()].len() { + let dom_block = choose_dominating_block( + &builder.idom[..], + Block::new(block), + false, + u, + )?; + let vreg = if dom_block.is_valid() && bool::arbitrary(u)? { + u.choose(&vregs_by_block[dom_block.index()][..])? + } else { + u.choose(&avail[..])? + }; + args.push(vreg.vreg()); + } + } + builder.add_inst(Block::new(block), InstData::branch(&args[..])); + } else { + builder.add_inst(Block::new(block), InstData::ret()); + } + } + + Ok(builder.finalize()) + } +} + +impl std::fmt::Debug for Func { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{{\n")?; + for (i, blockrange) in self.blocks.iter().enumerate() { + let succs = self.block_succs[i] + .iter() + .map(|b| b.index()) + .collect::>(); + let preds = self.block_preds[i] + .iter() + .map(|b| b.index()) + .collect::>(); + let params = self.block_params[i] + .iter() + .map(|v| format!("v{}", v.vreg())) + .collect::>() + .join(", "); + write!( + f, + " block{}({}): # succs:{:?} preds:{:?}\n", + i, params, succs, preds + )?; + for inst in blockrange.iter() { + write!( + f, + " inst{}: {:?} ops:{:?} clobber:{:?}\n", + inst.index(), + self.insts[inst.index()].op, + self.insts[inst.index()].operands, + self.insts[inst.index()].clobbers + )?; + } + } + write!(f, "}}\n")?; + Ok(()) + } +} + +pub fn machine_env() -> MachineEnv { + // Reg 31 is the scratch reg. + let regs: Vec = (0..31).map(|i| PReg::new(i, RegClass::Int)).collect(); + let regs_by_class: Vec> = vec![regs.clone(), vec![]]; + let scratch_by_class: Vec = + vec![PReg::new(31, RegClass::Int), PReg::new(0, RegClass::Float)]; + MachineEnv { + regs, + regs_by_class, + scratch_by_class, + } +} diff --git a/src/fuzzing/mod.rs b/src/fuzzing/mod.rs new file mode 100644 index 00000000..8aecdabd --- /dev/null +++ b/src/fuzzing/mod.rs @@ -0,0 +1,3 @@ +//! Utilities for fuzzing. + +pub mod func; diff --git a/src/index.rs b/src/index.rs new file mode 100644 index 00000000..1fe1b604 --- /dev/null +++ b/src/index.rs @@ -0,0 +1,176 @@ +#[macro_export] +macro_rules! define_index { + ($ix:ident) => { + #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub struct $ix(pub u32); + impl $ix { + #[inline(always)] + pub fn new(i: usize) -> Self { + Self(i as u32) + } + #[inline(always)] + pub fn index(self) -> usize { + assert!(self.is_valid()); + self.0 as usize + } + #[inline(always)] + pub fn invalid() -> Self { + Self(u32::MAX) + } + #[inline(always)] + pub fn is_invalid(self) -> bool { + self == Self::invalid() + } + #[inline(always)] + pub fn is_valid(self) -> bool { + self != Self::invalid() + } + #[inline(always)] + pub fn next(self) -> $ix { + assert!(self.is_valid()); + Self(self.0 + 1) + } + #[inline(always)] + pub fn prev(self) -> $ix { + assert!(self.is_valid()); + Self(self.0 - 1) + } + } + + impl crate::index::ContainerIndex for $ix {} + }; +} + +pub trait ContainerIndex: Clone + Copy + std::fmt::Debug + PartialEq + Eq {} + +pub trait ContainerComparator { + type Ix: ContainerIndex; + fn compare(&self, a: Self::Ix, b: Self::Ix) -> std::cmp::Ordering; +} + +define_index!(Inst); +define_index!(Block); + +#[derive(Clone, Copy, Debug)] +pub struct InstRange(Inst, Inst, bool); + +impl InstRange { + #[inline(always)] + pub fn forward(from: Inst, to: Inst) -> Self { + assert!(from.index() <= to.index()); + InstRange(from, to, true) + } + + #[inline(always)] + pub fn backward(from: Inst, to: Inst) -> Self { + assert!(from.index() >= to.index()); + InstRange(to, from, false) + } + + #[inline(always)] + pub fn first(self) -> Inst { + assert!(self.len() > 0); + if self.is_forward() { + self.0 + } else { + self.1.prev() + } + } + + #[inline(always)] + pub fn last(self) -> Inst { + assert!(self.len() > 0); + if self.is_forward() { + self.1.prev() + } else { + self.0 + } + } + + #[inline(always)] + pub fn rest(self) -> InstRange { + assert!(self.len() > 0); + if self.is_forward() { + InstRange::forward(self.0.next(), self.1) + } else { + InstRange::backward(self.1.prev(), self.0) + } + } + + #[inline(always)] + pub fn len(self) -> usize { + self.1.index() - self.0.index() + } + + #[inline(always)] + pub fn is_forward(self) -> bool { + self.2 + } + + #[inline(always)] + pub fn rev(self) -> Self { + Self(self.0, self.1, !self.2) + } + + #[inline(always)] + pub fn iter(self) -> InstRangeIter { + InstRangeIter(self) + } +} + +#[derive(Clone, Copy, Debug)] +pub struct InstRangeIter(InstRange); + +impl Iterator for InstRangeIter { + type Item = Inst; + #[inline(always)] + fn next(&mut self) -> Option { + if self.0.len() == 0 { + None + } else { + let ret = self.0.first(); + self.0 = self.0.rest(); + Some(ret) + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_inst_range() { + let range = InstRange::forward(Inst::new(0), Inst::new(0)); + assert_eq!(range.len(), 0); + + let range = InstRange::forward(Inst::new(0), Inst::new(5)); + assert_eq!(range.first().index(), 0); + assert_eq!(range.last().index(), 4); + assert_eq!(range.len(), 5); + assert_eq!( + range.iter().collect::>(), + vec![ + Inst::new(0), + Inst::new(1), + Inst::new(2), + Inst::new(3), + Inst::new(4) + ] + ); + let range = range.rev(); + assert_eq!(range.first().index(), 4); + assert_eq!(range.last().index(), 0); + assert_eq!(range.len(), 5); + assert_eq!( + range.iter().collect::>(), + vec![ + Inst::new(4), + Inst::new(3), + Inst::new(2), + Inst::new(1), + Inst::new(0) + ] + ); + } +} diff --git a/src/ion/LICENSE b/src/ion/LICENSE new file mode 100644 index 00000000..14e2f777 --- /dev/null +++ b/src/ion/LICENSE @@ -0,0 +1,373 @@ +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/src/ion/mod.rs b/src/ion/mod.rs new file mode 100644 index 00000000..78d42dca --- /dev/null +++ b/src/ion/mod.rs @@ -0,0 +1,3763 @@ +/* + * The following license applies to this file, which has been largely + * derived from the files `js/src/jit/BacktrackingAllocator.h` and + * `js/src/jit/BacktrackingAllocator.cpp` in Mozilla Firefox: + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +//! Backtracking register allocator on SSA code ported from IonMonkey's +//! BacktrackingAllocator. + +/* + * TODO: + * + * - tune heuristics: + * - splits: + * - safepoints? + * - split just before uses with fixed regs and/or just after defs + * with fixed regs? + * - try-any-reg allocate loop should randomly probe in caller-save + * ("preferred") regs first -- have a notion of "preferred regs" in + * MachineEnv? + * - measure average liverange length / number of splits / ... + * + * - reused-input reg: don't allocate register for input that is reused. + * + * - more fuzzing: + * - test with *multiple* fixed-reg constraints on one vreg (same + * inst, different insts) + * + * - modify CL to generate SSA VCode + * - lower blockparams to blockparams directly + * - use temps properly (`alloc_tmp()` vs `alloc_reg()`) + * + * - produce stackmaps + * - stack constraint (also: unify this with stack-args? spillslot vs user stackslot?) + * - vreg reffyness + * - if reffy vreg, add to stackmap lists during reification scan + */ + +#![allow(dead_code, unused_imports)] + +use crate::bitvec::BitVec; +use crate::cfg::CFGInfo; +use crate::index::ContainerComparator; +use crate::moves::ParallelMoves; +use crate::{ + define_index, domtree, Allocation, AllocationKind, Block, Edit, Function, Inst, InstPosition, + MachineEnv, Operand, OperandKind, OperandPolicy, OperandPos, Output, PReg, ProgPoint, + RegAllocError, RegClass, SpillSlot, VReg, +}; +use log::debug; +use smallvec::{smallvec, SmallVec}; +use std::cmp::Ordering; +use std::collections::{BTreeMap, BinaryHeap}; +use std::fmt::Debug; + +#[cfg(not(debug))] +fn validate_ssa(_: &F, _: &CFGInfo) -> Result<(), RegAllocError> { + Ok(()) +} + +#[cfg(debug)] +fn validate_ssa(f: &F, cfginfo: &CFGInfo) -> Result<(), RegAllocError> { + crate::validate_ssa(f, cfginfo) +} + +/// A range from `from` (inclusive) to `to` (exclusive). +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct CodeRange { + from: ProgPoint, + to: ProgPoint, +} + +impl CodeRange { + pub fn is_empty(&self) -> bool { + self.from == self.to + } + pub fn contains(&self, other: &Self) -> bool { + other.from >= self.from && other.to <= self.to + } + pub fn contains_point(&self, other: ProgPoint) -> bool { + other >= self.from && other < self.to + } + pub fn overlaps(&self, other: &Self) -> bool { + other.to > self.from && other.from < self.to + } + pub fn len(&self) -> usize { + self.to.inst.index() - self.from.inst.index() + } +} + +impl std::cmp::PartialOrd for CodeRange { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} +impl std::cmp::Ord for CodeRange { + fn cmp(&self, other: &Self) -> Ordering { + if self.to <= other.from { + Ordering::Less + } else if self.from >= other.to { + Ordering::Greater + } else { + Ordering::Equal + } + } +} + +define_index!(LiveBundleIndex); +define_index!(LiveRangeIndex); +define_index!(SpillSetIndex); +define_index!(UseIndex); +define_index!(DefIndex); +define_index!(VRegIndex); +define_index!(PRegIndex); +define_index!(SpillSlotIndex); + +type LiveBundleVec = SmallVec<[LiveBundleIndex; 4]>; + +#[derive(Clone, Debug)] +struct LiveRange { + range: CodeRange, + vreg: VRegIndex, + bundle: LiveBundleIndex, + uses_spill_weight: u32, + num_fixed_uses_and_flags: u32, + + first_use: UseIndex, + last_use: UseIndex, + def: DefIndex, + + next_in_bundle: LiveRangeIndex, + next_in_reg: LiveRangeIndex, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[repr(u32)] +enum LiveRangeFlag { + Minimal = 1, + Fixed = 2, +} + +impl LiveRange { + #[inline(always)] + pub fn num_fixed_uses(&self) -> u32 { + self.num_fixed_uses_and_flags & ((1 << 24) - 1) + } + #[inline(always)] + pub fn set_num_fixed_uses(&mut self, count: u32) { + debug_assert!(count < (1 << 24)); + self.num_fixed_uses_and_flags = (self.num_fixed_uses_and_flags & !((1 << 24) - 1)) | count; + } + #[inline(always)] + pub fn inc_num_fixed_uses(&mut self) { + debug_assert!(self.num_fixed_uses_and_flags & ((1 << 24) - 1) < ((1 << 24) - 1)); + self.num_fixed_uses_and_flags += 1; + } + #[inline(always)] + pub fn dec_num_fixed_uses(&mut self) { + debug_assert!(self.num_fixed_uses_and_flags & ((1 << 24) - 1) > 0); + self.num_fixed_uses_and_flags -= 1; + } + #[inline(always)] + pub fn set_flag(&mut self, flag: LiveRangeFlag) { + self.num_fixed_uses_and_flags |= (flag as u32) << 24; + } + #[inline(always)] + pub fn clear_flag(&mut self, flag: LiveRangeFlag) { + self.num_fixed_uses_and_flags &= !((flag as u32) << 24); + } + #[inline(always)] + pub fn has_flag(&self, flag: LiveRangeFlag) -> bool { + self.num_fixed_uses_and_flags & ((flag as u32) << 24) != 0 + } +} + +#[derive(Clone, Debug)] +struct Use { + operand: Operand, + pos: ProgPoint, + slot: usize, + next_use: UseIndex, +} + +#[derive(Clone, Debug)] +struct Def { + operand: Operand, + pos: ProgPoint, + slot: usize, +} + +#[derive(Clone, Debug)] +struct LiveBundle { + first_range: LiveRangeIndex, + last_range: LiveRangeIndex, + spillset: SpillSetIndex, + allocation: Allocation, + prio: u32, // recomputed after every bulk update + spill_weight_and_props: u32, +} + +impl LiveBundle { + #[inline(always)] + fn set_cached_spill_weight_and_props(&mut self, spill_weight: u32, minimal: bool, fixed: bool) { + debug_assert!(spill_weight < ((1 << 30) - 1)); + self.spill_weight_and_props = + spill_weight | (if minimal { 1 << 31 } else { 0 }) | (if fixed { 1 << 30 } else { 0 }); + } + + #[inline(always)] + fn cached_minimal(&self) -> bool { + self.spill_weight_and_props & (1 << 31) != 0 + } + + #[inline(always)] + fn cached_fixed(&self) -> bool { + self.spill_weight_and_props & (1 << 30) != 0 + } + + #[inline(always)] + fn cached_spill_weight(&self) -> u32 { + self.spill_weight_and_props & !((1 << 30) - 1) + } +} + +#[derive(Clone, Debug)] +struct SpillSet { + bundles: LiveBundleVec, + size: u32, + class: RegClass, + slot: SpillSlotIndex, + reg_hint: Option, +} + +#[derive(Clone, Debug)] +struct VRegData { + reg: VReg, + def: DefIndex, + blockparam: Block, + first_range: LiveRangeIndex, +} + +#[derive(Clone, Debug)] +struct PRegData { + reg: PReg, + allocations: LiveRangeSet, +} + +/* + * Environment setup: + * + * We have seven fundamental objects: LiveRange, LiveBundle, SpillSet, Use, Def, VReg, PReg. + * + * The relationship is as follows: + * + * LiveRange --(vreg)--> shared(VReg) + * LiveRange --(bundle)--> shared(LiveBundle) + * LiveRange --(def)--> owns(Def) + * LiveRange --(use) --> list(Use) + * + * Use --(vreg)--> shared(VReg) + * + * Def --(vreg) --> owns(VReg) + * + * LiveBundle --(range)--> list(LiveRange) + * LiveBundle --(spillset)--> shared(SpillSet) + * LiveBundle --(parent)--> parent(LiveBundle) + * + * SpillSet --(parent)--> parent(SpillSet) + * SpillSet --(bundles)--> list(LiveBundle) + * + * VReg --(range)--> list(LiveRange) + * + * PReg --(ranges)--> set(LiveRange) + */ + +#[derive(Clone, Debug)] +struct Env<'a, F: Function> { + func: &'a F, + env: &'a MachineEnv, + cfginfo: CFGInfo, + liveins: Vec, + /// Blockparam outputs: from-vreg, (end of) from-block, (start of) + /// to-block, to-vreg. The field order is significant: these are sorted so + /// that a scan over vregs, then blocks in each range, can scan in + /// order through this (sorted) list and add allocs to the + /// half-move list. + blockparam_outs: Vec<(VRegIndex, Block, Block, VRegIndex)>, + /// Blockparam inputs: to-vreg, (start of) to-block, (end of) + /// from-block. As above for `blockparam_outs`, field order is + /// significant. + blockparam_ins: Vec<(VRegIndex, Block, Block)>, + /// Blockparam allocs: block, idx, vreg, alloc. Info to describe + /// blockparam locations at block entry, for metadata purposes + /// (e.g. for the checker). + blockparam_allocs: Vec<(Block, u32, VRegIndex, Allocation)>, + + ranges: Vec, + bundles: Vec, + spillsets: Vec, + uses: Vec, + defs: Vec, + vregs: Vec, + pregs: Vec, + allocation_queue: PrioQueue, + hot_code: LiveRangeSet, + clobbers: Vec, // Sorted list of insts with clobbers. + + spilled_bundles: Vec, + spillslots: Vec, + slots_by_size: Vec, + + // When multiple fixed-register constraints are present on a + // single VReg at a single program point (this can happen for, + // e.g., call args that use the same value multiple times), we + // remove all but one of the fixed-register constraints, make a + // note here, and add a clobber with that PReg instread to keep + // the register available. When we produce the final edit-list, we + // will insert a copy from wherever the VReg's primary allocation + // was to the approprate PReg. + // + // (progpoint, copy-from-preg, copy-to-preg) + multi_fixed_reg_fixups: Vec<(ProgPoint, PRegIndex, PRegIndex)>, + + inserted_moves: Vec, + + // Output: + edits: Vec<(u32, InsertMovePrio, Edit)>, + allocs: Vec, + inst_alloc_offsets: Vec, + num_spillslots: u32, + + stats: Stats, + + // For debug output only: a list of textual annotations at every + // ProgPoint to insert into the final allocated program listing. + debug_annotations: std::collections::HashMap>, +} + +#[derive(Clone, Debug)] +struct SpillSlotData { + ranges: LiveRangeSet, + class: RegClass, + size: u32, + alloc: Allocation, + next_spillslot: SpillSlotIndex, +} + +#[derive(Clone, Debug)] +struct SpillSlotList { + first_spillslot: SpillSlotIndex, + last_spillslot: SpillSlotIndex, +} + +#[derive(Clone, Debug)] +struct PrioQueue { + heap: std::collections::BinaryHeap, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +struct PrioQueueEntry { + prio: u32, + bundle: LiveBundleIndex, +} + +#[derive(Clone, Debug)] +struct LiveRangeSet { + btree: BTreeMap, +} + +#[derive(Clone, Copy, Debug)] +struct LiveRangeKey { + from: u32, + to: u32, +} + +impl LiveRangeKey { + fn from_range(range: &CodeRange) -> Self { + Self { + from: range.from.to_index(), + to: range.to.to_index(), + } + } +} + +impl std::cmp::PartialEq for LiveRangeKey { + fn eq(&self, other: &Self) -> bool { + self.to > other.from && self.from < other.to + } +} +impl std::cmp::Eq for LiveRangeKey {} +impl std::cmp::PartialOrd for LiveRangeKey { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} +impl std::cmp::Ord for LiveRangeKey { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + if self.to <= other.from { + std::cmp::Ordering::Less + } else if self.from >= other.to { + std::cmp::Ordering::Greater + } else { + std::cmp::Ordering::Equal + } + } +} + +struct PrioQueueComparator<'a> { + prios: &'a [usize], +} +impl<'a> ContainerComparator for PrioQueueComparator<'a> { + type Ix = LiveBundleIndex; + fn compare(&self, a: Self::Ix, b: Self::Ix) -> std::cmp::Ordering { + self.prios[a.index()].cmp(&self.prios[b.index()]) + } +} + +impl PrioQueue { + fn new() -> Self { + PrioQueue { + heap: std::collections::BinaryHeap::new(), + } + } + + fn insert(&mut self, bundle: LiveBundleIndex, prio: usize) { + self.heap.push(PrioQueueEntry { + prio: prio as u32, + bundle, + }); + } + + fn is_empty(self) -> bool { + self.heap.is_empty() + } + + fn pop(&mut self) -> Option { + self.heap.pop().map(|entry| entry.bundle) + } +} + +impl LiveRangeSet { + pub(crate) fn new() -> Self { + Self { + btree: BTreeMap::new(), + } + } +} + +fn spill_weight_from_policy(policy: OperandPolicy) -> u32 { + match policy { + OperandPolicy::Any => 1000, + OperandPolicy::Reg | OperandPolicy::FixedReg(_) => 2000, + _ => 0, + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum Requirement { + Fixed(PReg), + Register(RegClass), + Any(RegClass), +} +impl Requirement { + fn class(self) -> RegClass { + match self { + Requirement::Fixed(preg) => preg.class(), + Requirement::Register(class) | Requirement::Any(class) => class, + } + } + + fn merge(self, other: Requirement) -> Option { + if self.class() != other.class() { + return None; + } + match (self, other) { + (other, Requirement::Any(_)) | (Requirement::Any(_), other) => Some(other), + (Requirement::Register(_), Requirement::Fixed(preg)) + | (Requirement::Fixed(preg), Requirement::Register(_)) => { + Some(Requirement::Fixed(preg)) + } + (Requirement::Register(_), Requirement::Register(_)) => Some(self), + (Requirement::Fixed(a), Requirement::Fixed(b)) if a == b => Some(self), + _ => None, + } + } + fn from_operand(op: Operand) -> Requirement { + match op.policy() { + OperandPolicy::FixedReg(preg) => Requirement::Fixed(preg), + OperandPolicy::Reg | OperandPolicy::Reuse(_) => Requirement::Register(op.class()), + _ => Requirement::Any(op.class()), + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq)] +enum AllocRegResult { + Allocated(Allocation), + Conflict(LiveBundleVec), + ConflictWithFixed, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +struct BundleProperties { + minimal: bool, + fixed: bool, +} + +#[derive(Clone, Debug)] +struct InsertedMove { + pos: ProgPoint, + prio: InsertMovePrio, + from_alloc: Allocation, + to_alloc: Allocation, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +enum InsertMovePrio { + InEdgeMoves, + BlockParam, + Regular, + MultiFixedReg, + ReusedInput, + OutEdgeMoves, +} + +#[derive(Clone, Copy, Debug, Default)] +pub struct Stats { + initial_liverange_count: usize, + merged_bundle_count: usize, + process_bundle_count: usize, + process_bundle_reg_probes_fixed: usize, + process_bundle_reg_success_fixed: usize, + process_bundle_reg_probes_any: usize, + process_bundle_reg_success_any: usize, + evict_bundle_event: usize, + evict_bundle_count: usize, + splits: usize, + splits_clobbers: usize, + splits_hot: usize, + splits_conflicts: usize, + splits_all: usize, + final_liverange_count: usize, + final_bundle_count: usize, + spill_bundle_count: usize, + spill_bundle_reg_probes: usize, + spill_bundle_reg_success: usize, + blockparam_ins_count: usize, + blockparam_outs_count: usize, + blockparam_allocs_count: usize, + halfmoves_count: usize, + edits_count: usize, +} + +impl<'a, F: Function> Env<'a, F> { + pub(crate) fn new(func: &'a F, env: &'a MachineEnv, cfginfo: CFGInfo) -> Self { + Self { + func, + env, + cfginfo, + + liveins: vec![], + blockparam_outs: vec![], + blockparam_ins: vec![], + blockparam_allocs: vec![], + bundles: vec![], + ranges: vec![], + spillsets: vec![], + uses: vec![], + defs: vec![], + vregs: vec![], + pregs: vec![], + allocation_queue: PrioQueue::new(), + clobbers: vec![], + hot_code: LiveRangeSet::new(), + spilled_bundles: vec![], + spillslots: vec![], + slots_by_size: vec![], + + multi_fixed_reg_fixups: vec![], + inserted_moves: vec![], + edits: vec![], + allocs: vec![], + inst_alloc_offsets: vec![], + num_spillslots: 0, + + stats: Stats::default(), + + debug_annotations: std::collections::HashMap::new(), + } + } + + fn create_pregs_and_vregs(&mut self) { + // Create RRegs from the RealRegUniverse. + for &preg in &self.env.regs { + self.pregs.push(PRegData { + reg: preg, + allocations: LiveRangeSet::new(), + }); + } + // Create VRegs from the vreg count. + for idx in 0..self.func.num_vregs() { + // We'll fill in the real details when we see the def. + let reg = VReg::new(idx, RegClass::Int); + self.add_vreg(VRegData { + reg, + def: DefIndex::invalid(), + first_range: LiveRangeIndex::invalid(), + blockparam: Block::invalid(), + }); + } + // Create allocations too. + for inst in 0..self.func.insts() { + let start = self.allocs.len() as u32; + self.inst_alloc_offsets.push(start); + for _ in 0..self.func.inst_operands(Inst::new(inst)).len() { + self.allocs.push(Allocation::none()); + } + } + } + + fn add_vreg(&mut self, data: VRegData) -> VRegIndex { + let idx = self.vregs.len(); + self.vregs.push(data); + VRegIndex::new(idx) + } + + fn create_liverange(&mut self, range: CodeRange) -> LiveRangeIndex { + let idx = self.ranges.len(); + self.ranges.push(LiveRange { + range, + vreg: VRegIndex::invalid(), + bundle: LiveBundleIndex::invalid(), + uses_spill_weight: 0, + num_fixed_uses_and_flags: 0, + first_use: UseIndex::invalid(), + last_use: UseIndex::invalid(), + def: DefIndex::invalid(), + next_in_bundle: LiveRangeIndex::invalid(), + next_in_reg: LiveRangeIndex::invalid(), + }); + LiveRangeIndex::new(idx) + } + + /// Mark `range` as live for the given `vreg`. `num_ranges` is used to prevent + /// excessive coalescing on pathological inputs. + /// + /// Returns the liverange that contains the given range. + fn add_liverange_to_vreg( + &mut self, + vreg: VRegIndex, + range: CodeRange, + num_ranges: &mut usize, + ) -> LiveRangeIndex { + log::debug!("add_liverange_to_vreg: vreg {:?} range {:?}", vreg, range); + const COALESCE_LIMIT: usize = 100_000; + + // Look for a single or contiguous sequence of existing live ranges that overlap with the + // given range. + + let mut insert_after = LiveRangeIndex::invalid(); + let mut merged = LiveRangeIndex::invalid(); + let mut iter = self.vregs[vreg.index()].first_range; + let mut prev = LiveRangeIndex::invalid(); + while iter.is_valid() { + let existing = &mut self.ranges[iter.index()]; + log::debug!(" -> existing range: {:?}", existing); + if range.from >= existing.range.to && *num_ranges < COALESCE_LIMIT { + // New range comes fully after this one -- record it as a lower bound. + insert_after = iter; + prev = iter; + iter = existing.next_in_reg; + log::debug!(" -> lower bound"); + continue; + } + if range.to <= existing.range.from { + // New range comes fully before this one -- we're found our spot. + log::debug!(" -> upper bound (break search loop)"); + break; + } + // If we're here, then we overlap with at least one endpoint of the range. + log::debug!(" -> must overlap"); + debug_assert!(range.overlaps(&existing.range)); + if merged.is_invalid() { + // This is the first overlapping range. Extend to simply cover the new range. + merged = iter; + if range.from < existing.range.from { + existing.range.from = range.from; + } + if range.to > existing.range.to { + existing.range.to = range.to; + } + log::debug!( + " -> extended range of existing range to {:?}", + existing.range + ); + // Continue; there may be more ranges to merge with. + prev = iter; + iter = existing.next_in_reg; + continue; + } + // We overlap but we've already extended the first overlapping existing liverange, so + // we need to do a true merge instead. + log::debug!(" -> merging {:?} into {:?}", iter, merged); + log::debug!( + " -> before: merged {:?}: {:?}", + merged, + self.ranges[merged.index()] + ); + debug_assert!( + self.ranges[iter.index()].range.from >= self.ranges[merged.index()].range.from + ); // Because we see LRs in order. + if self.ranges[iter.index()].range.to > self.ranges[merged.index()].range.to { + self.ranges[merged.index()].range.to = self.ranges[iter.index()].range.to; + } + if self.ranges[iter.index()].def.is_valid() { + self.ranges[merged.index()].def = self.ranges[iter.index()].def; + } + self.distribute_liverange_uses(vreg, iter, merged); + log::debug!( + " -> after: merged {:?}: {:?}", + merged, + self.ranges[merged.index()] + ); + + // Remove from list of liveranges for this vreg. + let next = self.ranges[iter.index()].next_in_reg; + if prev.is_valid() { + self.ranges[prev.index()].next_in_reg = next; + } else { + self.vregs[vreg.index()].first_range = next; + } + // `prev` remains the same (we deleted current range). + iter = next; + } + + // If we get here and did not merge into an existing liverange or liveranges, then we need + // to create a new one. + if merged.is_invalid() { + let lr = self.create_liverange(range); + self.ranges[lr.index()].vreg = vreg; + if insert_after.is_valid() { + let next = self.ranges[insert_after.index()].next_in_reg; + self.ranges[lr.index()].next_in_reg = next; + self.ranges[insert_after.index()].next_in_reg = lr; + } else { + self.ranges[lr.index()].next_in_reg = self.vregs[vreg.index()].first_range; + self.vregs[vreg.index()].first_range = lr; + } + *num_ranges += 1; + lr + } else { + merged + } + } + + fn distribute_liverange_uses( + &mut self, + vreg: VRegIndex, + from: LiveRangeIndex, + into: LiveRangeIndex, + ) { + log::debug!("distribute from {:?} to {:?}", from, into); + assert_eq!( + self.ranges[from.index()].vreg, + self.ranges[into.index()].vreg + ); + let from_range = self.ranges[from.index()].range; + let into_range = self.ranges[into.index()].range; + // For every use in `from`... + let mut prev = UseIndex::invalid(); + let mut iter = self.ranges[from.index()].first_use; + while iter.is_valid() { + let usedata = &mut self.uses[iter.index()]; + // If we have already passed `into`, we're done. + if usedata.pos >= into_range.to { + break; + } + // If this use is within the range of `into`, move it over. + if into_range.contains_point(usedata.pos) { + log::debug!(" -> moving {:?}", iter); + let next = usedata.next_use; + if prev.is_valid() { + self.uses[prev.index()].next_use = next; + } else { + self.ranges[from.index()].first_use = next; + } + if iter == self.ranges[from.index()].last_use { + self.ranges[from.index()].last_use = prev; + } + // `prev` remains the same. + self.update_liverange_stats_on_remove_use(from, iter); + // This may look inefficient but because we are always merging + // non-overlapping LiveRanges, all uses will be at the beginning + // or end of the existing use-list; both cases are optimized. + self.insert_use_into_liverange_and_update_stats(into, iter); + iter = next; + } else { + prev = iter; + iter = usedata.next_use; + } + } + + // Distribute def too if `from` has a def and the def is in range of `into_range`. + if self.ranges[from.index()].def.is_valid() { + let def_idx = self.vregs[vreg.index()].def; + if from_range.contains_point(self.defs[def_idx.index()].pos) { + self.ranges[into.index()].def = def_idx; + } + } + } + + fn update_liverange_stats_on_remove_use(&mut self, from: LiveRangeIndex, u: UseIndex) { + log::debug!("remove use {:?} from lr {:?}", u, from); + debug_assert!(u.is_valid()); + let usedata = &self.uses[u.index()]; + let lrdata = &mut self.ranges[from.index()]; + if let OperandPolicy::FixedReg(_) = usedata.operand.policy() { + lrdata.dec_num_fixed_uses(); + } + log::debug!( + " -> subtract {} from uses_spill_weight {}; now {}", + spill_weight_from_policy(usedata.operand.policy()), + lrdata.uses_spill_weight, + lrdata.uses_spill_weight - spill_weight_from_policy(usedata.operand.policy()), + ); + + lrdata.uses_spill_weight -= spill_weight_from_policy(usedata.operand.policy()); + } + + fn insert_use_into_liverange_and_update_stats(&mut self, into: LiveRangeIndex, u: UseIndex) { + let insert_pos = self.uses[u.index()].pos; + let first = self.ranges[into.index()].first_use; + self.uses[u.index()].next_use = UseIndex::invalid(); + if first.is_invalid() { + // Empty list. + self.ranges[into.index()].first_use = u; + self.ranges[into.index()].last_use = u; + } else if insert_pos > self.uses[self.ranges[into.index()].last_use.index()].pos { + // After tail. + let tail = self.ranges[into.index()].last_use; + self.uses[tail.index()].next_use = u; + self.ranges[into.index()].last_use = u; + } else { + // Otherwise, scan linearly to find insertion position. + let mut prev = UseIndex::invalid(); + let mut iter = first; + while iter.is_valid() { + if self.uses[iter.index()].pos > insert_pos { + break; + } + prev = iter; + iter = self.uses[iter.index()].next_use; + } + self.uses[u.index()].next_use = iter; + if prev.is_valid() { + self.uses[prev.index()].next_use = u; + } else { + self.ranges[into.index()].first_use = u; + } + if iter.is_invalid() { + self.ranges[into.index()].last_use = u; + } + } + + // Update stats. + let policy = self.uses[u.index()].operand.policy(); + if let OperandPolicy::FixedReg(_) = policy { + self.ranges[into.index()].inc_num_fixed_uses(); + } + log::debug!( + "insert use {:?} into lr {:?} with weight {}", + u, + into, + spill_weight_from_policy(policy) + ); + self.ranges[into.index()].uses_spill_weight += spill_weight_from_policy(policy); + log::debug!(" -> now {}", self.ranges[into.index()].uses_spill_weight); + } + + fn find_vreg_liverange_for_pos( + &self, + vreg: VRegIndex, + pos: ProgPoint, + ) -> Option { + let mut range = self.vregs[vreg.index()].first_range; + while range.is_valid() { + if self.ranges[range.index()].range.contains_point(pos) { + return Some(range); + } + range = self.ranges[range.index()].next_in_reg; + } + None + } + + fn add_liverange_to_preg(&mut self, range: CodeRange, reg: PReg) { + let preg_idx = PRegIndex::new(reg.index()); + let lr = self.create_liverange(range); + self.pregs[preg_idx.index()] + .allocations + .btree + .insert(LiveRangeKey::from_range(&range), lr); + } + + fn compute_liveness(&mut self) { + // Create initial LiveIn bitsets. + for _ in 0..self.func.blocks() { + self.liveins.push(BitVec::new()); + } + + let num_vregs = self.func.num_vregs(); + + let mut num_ranges = 0; + + // Create Uses and Defs referring to VRegs, and place the Uses + // in LiveRanges. + // + // We iterate backward, so as long as blocks are well-ordered + // (in RPO), we see uses before defs. + // + // Because of this, we can construct live ranges in one pass, + // i.e., considering each block once, propagating live + // registers backward across edges to a bitset at each block + // exit point, gen'ing at uses, kill'ing at defs, and meeting + // with a union. + let mut block_to_postorder: SmallVec<[Option; 16]> = + smallvec![None; self.func.blocks()]; + for i in 0..self.cfginfo.postorder.len() { + let block = self.cfginfo.postorder[i]; + block_to_postorder[block.index()] = Some(i as u32); + } + + // Track current LiveRange for each vreg. + let mut vreg_ranges: Vec = + vec![LiveRangeIndex::invalid(); self.func.num_vregs()]; + + for i in 0..self.cfginfo.postorder.len() { + // (avoid borrowing `self`) + let block = self.cfginfo.postorder[i]; + block_to_postorder[block.index()] = Some(i as u32); + + // Init live-set to union of liveins from successors + // (excluding backedges; those are handled below). + let mut live = BitVec::with_capacity(num_vregs); + for &succ in self.func.block_succs(block) { + live.or(&self.liveins[succ.index()]); + } + + // Initially, registers are assumed live for the whole block. + for vreg in live.iter() { + let range = CodeRange { + from: self.cfginfo.block_entry[block.index()], + to: self.cfginfo.block_exit[block.index()].next(), + }; + log::debug!( + "vreg {:?} live at end of block --> create range {:?}", + VRegIndex::new(vreg), + range + ); + let lr = self.add_liverange_to_vreg(VRegIndex::new(vreg), range, &mut num_ranges); + vreg_ranges[vreg] = lr; + } + + // Create vreg data for blockparams. + for param in self.func.block_params(block) { + self.vregs[param.vreg()].reg = *param; + self.vregs[param.vreg()].blockparam = block; + } + + let insns = self.func.block_insns(block); + + // If the last instruction is a branch (rather than + // return), create blockparam_out entries. + if self.func.is_branch(insns.last()) { + let operands = self.func.inst_operands(insns.last()); + let mut i = 0; + for &succ in self.func.block_succs(block) { + for &blockparam in self.func.block_params(succ) { + let from_vreg = VRegIndex::new(operands[i].vreg().vreg()); + let blockparam_vreg = VRegIndex::new(blockparam.vreg()); + self.blockparam_outs + .push((from_vreg, block, succ, blockparam_vreg)); + i += 1; + } + } + } + + // For each instruction, in reverse order, process + // operands and clobbers. + for inst in insns.rev().iter() { + if self.func.inst_clobbers(inst).len() > 0 { + self.clobbers.push(inst); + } + // Mark clobbers with CodeRanges on PRegs. + for i in 0..self.func.inst_clobbers(inst).len() { + // don't borrow `self` + let clobber = self.func.inst_clobbers(inst)[i]; + let range = CodeRange { + from: ProgPoint::before(inst), + to: ProgPoint::before(inst.next()), + }; + self.add_liverange_to_preg(range, clobber); + } + + // Does the instruction have any input-reusing + // outputs? This is important below to establish + // proper interference wrt other inputs. + let mut reused_input = None; + for op in self.func.inst_operands(inst) { + if let OperandPolicy::Reuse(i) = op.policy() { + reused_input = Some(i); + break; + } + } + + // Process defs and uses. + for i in 0..self.func.inst_operands(inst).len() { + // don't borrow `self` + let operand = self.func.inst_operands(inst)[i]; + match operand.kind() { + OperandKind::Def => { + // Create the Def object. + let pos = match operand.pos() { + OperandPos::Before | OperandPos::Both => ProgPoint::before(inst), + OperandPos::After => ProgPoint::after(inst), + }; + let def = DefIndex(self.defs.len() as u32); + self.defs.push(Def { + operand, + pos, + slot: i, + }); + + log::debug!("Def of {} at {:?}", operand.vreg(), pos); + + // Fill in vreg's actual data. + debug_assert!(self.vregs[operand.vreg().vreg()].def.is_invalid()); + self.vregs[operand.vreg().vreg()].reg = operand.vreg(); + self.vregs[operand.vreg().vreg()].def = def; + + // Trim the range for this vreg to start + // at `pos` if it previously ended at the + // start of this block (i.e. was not + // merged into some larger LiveRange due + // to out-of-order blocks). + let mut lr = vreg_ranges[operand.vreg().vreg()]; + log::debug!(" -> has existing LR {:?}", lr); + // If there was no liverange (dead def), create a trivial one. + if lr.is_invalid() { + lr = self.add_liverange_to_vreg( + VRegIndex::new(operand.vreg().vreg()), + CodeRange { + from: pos, + to: pos.next(), + }, + &mut num_ranges, + ); + log::debug!(" -> invalid; created {:?}", lr); + } + if self.ranges[lr.index()].range.from + == self.cfginfo.block_entry[block.index()] + { + log::debug!(" -> started at block start; trimming to {:?}", pos); + self.ranges[lr.index()].range.from = pos; + } + // Note that the liverange contains a def. + self.ranges[lr.index()].def = def; + // Remove from live-set. + live.set(operand.vreg().vreg(), false); + vreg_ranges[operand.vreg().vreg()] = LiveRangeIndex::invalid(); + } + OperandKind::Use => { + // Establish where the use occurs. + let mut pos = match operand.pos() { + OperandPos::Before => ProgPoint::before(inst), + OperandPos::Both | OperandPos::After => ProgPoint::after(inst), + }; + // If there are any reused inputs in this + // instruction, and this is *not* the + // reused input, force `pos` to + // `After`. (See note below for why; it's + // very subtle!) + if reused_input.is_some() && reused_input.unwrap() != i { + pos = ProgPoint::after(inst); + } + // If this is a branch, extend `pos` to + // the end of the block. (Branch uses are + // blockparams and need to be live at the + // end of the block. + if self.func.is_branch(inst) { + pos = self.cfginfo.block_exit[block.index()]; + } + + // Create the actual use object. + let u = UseIndex(self.uses.len() as u32); + self.uses.push(Use { + operand, + pos, + slot: i, + next_use: UseIndex::invalid(), + }); + + // Create/extend the LiveRange and add the use to the range. + let range = CodeRange { + from: self.cfginfo.block_entry[block.index()], + to: pos.next(), + }; + let lr = self.add_liverange_to_vreg( + VRegIndex::new(operand.vreg().vreg()), + range, + &mut num_ranges, + ); + vreg_ranges[operand.vreg().vreg()] = lr; + + log::debug!("Use of {:?} at {:?} -> {:?} -> {:?}", operand, pos, u, lr); + + self.insert_use_into_liverange_and_update_stats(lr, u); + + // Add to live-set. + live.set(operand.vreg().vreg(), true); + } + } + } + } + + // Block parameters define vregs at the very beginning of + // the block. Remove their live vregs from the live set + // here. + for vreg in self.func.block_params(block) { + if live.get(vreg.vreg()) { + live.set(vreg.vreg(), false); + } else { + // Create trivial liverange if blockparam is dead. + let start = self.cfginfo.block_entry[block.index()]; + self.add_liverange_to_vreg( + VRegIndex::new(vreg.vreg()), + CodeRange { + from: start, + to: start.next(), + }, + &mut num_ranges, + ); + } + // add `blockparam_ins` entries. + let vreg_idx = VRegIndex::new(vreg.vreg()); + for &pred in self.func.block_preds(block) { + self.blockparam_ins.push((vreg_idx, block, pred)); + } + } + + // Loop-handling: to handle backedges, rather than running + // a fixpoint loop, we add a live-range for every value + // live at the beginning of the loop over the whole loop + // body. + // + // To determine what the "loop body" consists of, we find + // the transitively minimum-reachable traversal index in + // our traversal order before the current block + // index. When we discover a backedge, *all* block indices + // within the traversal range are considered part of the + // loop body. This is guaranteed correct (though perhaps + // an overapproximation) even for irreducible control + // flow, because it will find all blocks to which the + // liveness could flow backward over which we've already + // scanned, and it should give good results for reducible + // control flow with properly ordered blocks. + let mut min_pred = i; + let mut loop_scan = i; + log::debug!( + "looking for loops from postorder#{} (block{})", + i, + self.cfginfo.postorder[i].index() + ); + while loop_scan >= min_pred { + let block = self.cfginfo.postorder[loop_scan]; + log::debug!( + " -> scan at postorder#{} (block{})", + loop_scan, + block.index() + ); + for &pred in self.func.block_preds(block) { + log::debug!( + " -> pred block{} (postorder#{})", + pred.index(), + block_to_postorder[pred.index()].unwrap_or(min_pred as u32) + ); + min_pred = std::cmp::min( + min_pred, + block_to_postorder[pred.index()].unwrap_or(min_pred as u32) as usize, + ); + log::debug!(" -> min_pred = {}", min_pred); + } + if loop_scan == 0 { + break; + } + loop_scan -= 1; + } + + if min_pred < i { + // We have one or more backedges, and the loop body is + // (conservatively) postorder[min_pred..i]. Find a + // range that covers all of those blocks. + let loop_blocks = &self.cfginfo.postorder[min_pred..=i]; + let loop_begin = loop_blocks + .iter() + .map(|b| self.cfginfo.block_entry[b.index()]) + .min() + .unwrap(); + let loop_end = loop_blocks + .iter() + .map(|b| self.cfginfo.block_exit[b.index()]) + .max() + .unwrap(); + let loop_range = CodeRange { + from: loop_begin, + to: loop_end, + }; + log::debug!( + "found backedge wrt postorder: postorder#{}..postorder#{}", + min_pred, + i + ); + log::debug!(" -> loop range {:?}", loop_range); + for &loopblock in loop_blocks { + self.liveins[loopblock.index()].or(&live); + } + for vreg in live.iter() { + log::debug!( + "vreg {:?} live at top of loop (block {:?}) -> range {:?}", + VRegIndex::new(vreg), + block, + loop_range, + ); + self.add_liverange_to_vreg(VRegIndex::new(vreg), loop_range, &mut num_ranges); + } + } + + log::debug!("liveins at block {:?} = {:?}", block, live); + self.liveins[block.index()] = live; + } + + // Do a cleanup pass: if there are any LiveRanges with + // multiple uses (or defs) at the same ProgPoint and there is + // more than one FixedReg constraint at that ProgPoint, we + // need to record all but one of them in a special fixup list + // and handle them later; otherwise, bundle-splitting to + // create minimal bundles becomes much more complex (we would + // have to split the multiple uses at the same progpoint into + // different bundles, which breaks invariants related to + // disjoint ranges and bundles). + for vreg in 0..self.vregs.len() { + let mut iter = self.vregs[vreg].first_range; + while iter.is_valid() { + log::debug!( + "multi-fixed-reg cleanup: vreg {:?} range {:?}", + VRegIndex::new(vreg), + iter + ); + let mut last_point = None; + let mut seen_fixed_for_vreg: SmallVec<[VReg; 16]> = smallvec![]; + let mut first_preg: SmallVec<[PRegIndex; 16]> = smallvec![]; + let mut extra_clobbers: SmallVec<[(PReg, Inst); 8]> = smallvec![]; + let mut fixup_multi_fixed_vregs = |pos: ProgPoint, + op: &mut Operand, + fixups: &mut Vec<( + ProgPoint, + PRegIndex, + PRegIndex, + )>| { + if last_point.is_some() && Some(pos) != last_point { + seen_fixed_for_vreg.clear(); + first_preg.clear(); + } + last_point = Some(pos); + + if let OperandPolicy::FixedReg(preg) = op.policy() { + let vreg_idx = VRegIndex::new(op.vreg().vreg()); + let preg_idx = PRegIndex::new(preg.index()); + log::debug!( + "at pos {:?}, vreg {:?} has fixed constraint to preg {:?}", + pos, + vreg_idx, + preg_idx + ); + if let Some(idx) = seen_fixed_for_vreg.iter().position(|r| *r == op.vreg()) + { + let orig_preg = first_preg[idx]; + log::debug!(" -> duplicate; switching to policy Reg"); + fixups.push((pos, orig_preg, preg_idx)); + *op = Operand::new(op.vreg(), OperandPolicy::Reg, op.kind(), op.pos()); + extra_clobbers.push((preg, pos.inst)); + } else { + seen_fixed_for_vreg.push(op.vreg()); + first_preg.push(preg_idx); + } + } + }; + + if self.ranges[iter.index()].def.is_valid() { + let def_idx = self.vregs[vreg].def; + let pos = self.defs[def_idx.index()].pos; + fixup_multi_fixed_vregs( + pos, + &mut self.defs[def_idx.index()].operand, + &mut self.multi_fixed_reg_fixups, + ); + } + + let mut use_iter = self.ranges[iter.index()].first_use; + while use_iter.is_valid() { + let pos = self.uses[use_iter.index()].pos; + fixup_multi_fixed_vregs( + pos, + &mut self.uses[use_iter.index()].operand, + &mut self.multi_fixed_reg_fixups, + ); + use_iter = self.uses[use_iter.index()].next_use; + } + + for (clobber, inst) in extra_clobbers { + let range = CodeRange { + from: ProgPoint::before(inst), + to: ProgPoint::before(inst.next()), + }; + self.add_liverange_to_preg(range, clobber); + } + + iter = self.ranges[iter.index()].next_in_reg; + } + } + + self.clobbers.sort(); + self.blockparam_ins.sort(); + self.blockparam_outs.sort(); + + self.stats.initial_liverange_count = self.ranges.len(); + self.stats.blockparam_ins_count = self.blockparam_ins.len(); + self.stats.blockparam_outs_count = self.blockparam_outs.len(); + } + + fn compute_hot_code(&mut self) { + // Initialize hot_code to contain inner loops only. + let mut header = Block::invalid(); + let mut backedge = Block::invalid(); + for block in 0..self.func.blocks() { + let block = Block::new(block); + let max_backedge = self + .func + .block_preds(block) + .iter() + .filter(|b| b.index() >= block.index()) + .max(); + if let Some(&b) = max_backedge { + header = block; + backedge = b; + } + if block == backedge { + // We've traversed a loop body without finding a deeper loop. Mark the whole body + // as hot. + let from = self.cfginfo.block_entry[header.index()]; + let to = self.cfginfo.block_exit[backedge.index()].next(); + let range = CodeRange { from, to }; + let lr = self.create_liverange(range); + self.hot_code + .btree + .insert(LiveRangeKey::from_range(&range), lr); + } + } + } + + fn create_bundle(&mut self) -> LiveBundleIndex { + let bundle = self.bundles.len(); + self.bundles.push(LiveBundle { + allocation: Allocation::none(), + first_range: LiveRangeIndex::invalid(), + last_range: LiveRangeIndex::invalid(), + spillset: SpillSetIndex::invalid(), + prio: 0, + spill_weight_and_props: 0, + }); + LiveBundleIndex::new(bundle) + } + + fn try_merge_reused_register(&mut self, from: VRegIndex, to: VRegIndex) { + log::debug!("try_merge_reused_register: from {:?} to {:?}", from, to); + let def_idx = self.vregs[to.index()].def; + log::debug!(" -> def_idx = {:?}", def_idx); + debug_assert!(def_idx.is_valid()); + let def = &mut self.defs[def_idx.index()]; + let def_point = def.pos; + log::debug!(" -> def_point = {:?}", def_point); + + // Can't merge if def happens at use-point. + if def_point.pos == InstPosition::Before { + return; + } + + // Find the corresponding liverange for the use at the def-point. + let use_lr_at_def = self.find_vreg_liverange_for_pos(from, def_point); + log::debug!(" -> use_lr_at_def = {:?}", use_lr_at_def); + + // If the use is not live at the def (i.e. this inst is its last use), we can merge. + if use_lr_at_def.is_none() { + // Find the bundles and merge. Note that bundles have not been split + // yet so every liverange in the vreg will have the same bundle (so + // no need to look up the proper liverange here). + let from_bundle = self.ranges[self.vregs[from.index()].first_range.index()].bundle; + let to_bundle = self.ranges[self.vregs[to.index()].first_range.index()].bundle; + log::debug!(" -> merging from {:?} to {:?}", from_bundle, to_bundle); + self.merge_bundles(from_bundle, to_bundle); + return; + } + + log::debug!(" -> no merge"); + + // Note: there may be other cases where it would benefit us to split the + // LiveRange and bundle for the input at the def-point, allowing us to + // avoid a copy. However, the cases where this helps in IonMonkey (only + // memory uses after the definition, seemingly) appear to be marginal at + // best. + } + + fn merge_bundles(&mut self, from: LiveBundleIndex, to: LiveBundleIndex) -> bool { + if from == to { + // Merge bundle into self -- trivial merge. + return true; + } + log::debug!( + "merging from bundle{} to bundle{}", + from.index(), + to.index() + ); + + let vreg_from = self.ranges[self.bundles[from.index()].first_range.index()].vreg; + let vreg_to = self.ranges[self.bundles[to.index()].first_range.index()].vreg; + // Both bundles must deal with the same RegClass. All vregs in a bundle + // have to have the same regclass (because bundles start with one vreg + // and all merging happens here) so we can just sample the first vreg of + // each bundle. + if self.vregs[vreg_from.index()].reg.class() != self.vregs[vreg_to.index()].reg.class() { + return false; + } + + // Check for overlap in LiveRanges. + let mut iter0 = self.bundles[from.index()].first_range; + let mut iter1 = self.bundles[to.index()].first_range; + let mut range_count = 0; + while iter0.is_valid() && iter1.is_valid() { + range_count += 1; + if range_count > 200 { + // Limit merge complexity. + return false; + } + + if self.ranges[iter0.index()].range.from >= self.ranges[iter1.index()].range.to { + iter1 = self.ranges[iter1.index()].next_in_bundle; + } else if self.ranges[iter1.index()].range.from >= self.ranges[iter0.index()].range.to { + iter0 = self.ranges[iter0.index()].next_in_bundle; + } else { + // Overlap -- cannot merge. + return false; + } + } + + // If we reach here, then the bundles do not overlap -- merge them! + // We do this with a merge-sort-like scan over both chains, removing + // from `to` (`iter1`) and inserting into `from` (`iter0`). + let mut iter0 = self.bundles[from.index()].first_range; + let mut iter1 = self.bundles[to.index()].first_range; + if iter0.is_invalid() { + // `from` bundle is empty -- trivial merge. + return true; + } + if iter1.is_invalid() { + // `to` bundle is empty -- just move head/tail pointers over from + // `from` and set `bundle` up-link on all ranges. + let head = self.bundles[from.index()].first_range; + let tail = self.bundles[from.index()].last_range; + self.bundles[to.index()].first_range = head; + self.bundles[to.index()].last_range = tail; + self.bundles[from.index()].first_range = LiveRangeIndex::invalid(); + self.bundles[from.index()].last_range = LiveRangeIndex::invalid(); + while iter0.is_valid() { + self.ranges[iter0.index()].bundle = from; + iter0 = self.ranges[iter0.index()].next_in_bundle; + } + return true; + } + + // Two non-empty chains of LiveRanges: traverse both simultaneously and + // merge links into `from`. + let mut prev = LiveRangeIndex::invalid(); + while iter0.is_valid() || iter1.is_valid() { + // Pick the next range. + let next_range_iter = if iter0.is_valid() { + if iter1.is_valid() { + if self.ranges[iter0.index()].range.from + <= self.ranges[iter1.index()].range.from + { + &mut iter0 + } else { + &mut iter1 + } + } else { + &mut iter0 + } + } else { + &mut iter1 + }; + let next = *next_range_iter; + *next_range_iter = self.ranges[next.index()].next_in_bundle; + + // link from prev. + if prev.is_valid() { + self.ranges[prev.index()].next_in_bundle = next; + } else { + self.bundles[to.index()].first_range = next; + } + self.bundles[to.index()].last_range = next; + self.ranges[next.index()].bundle = to; + prev = next; + } + self.bundles[from.index()].first_range = LiveRangeIndex::invalid(); + self.bundles[from.index()].last_range = LiveRangeIndex::invalid(); + + true + } + + fn insert_liverange_into_bundle(&mut self, bundle: LiveBundleIndex, lr: LiveRangeIndex) { + self.ranges[lr.index()].next_in_bundle = LiveRangeIndex::invalid(); + self.ranges[lr.index()].bundle = bundle; + if self.bundles[bundle.index()].first_range.is_invalid() { + // Empty bundle. + self.bundles[bundle.index()].first_range = lr; + self.bundles[bundle.index()].last_range = lr; + } else if self.ranges[self.bundles[bundle.index()].first_range.index()] + .range + .to + <= self.ranges[lr.index()].range.from + { + // After last range in bundle. + let last = self.bundles[bundle.index()].last_range; + self.ranges[last.index()].next_in_bundle = lr; + self.bundles[bundle.index()].last_range = lr; + } else { + // Find location to insert. + let mut iter = self.bundles[bundle.index()].first_range; + let mut insert_after = LiveRangeIndex::invalid(); + let insert_range = self.ranges[lr.index()].range; + while iter.is_valid() { + debug_assert!(!self.ranges[iter.index()].range.overlaps(&insert_range)); + if self.ranges[iter.index()].range.to <= insert_range.from { + break; + } + insert_after = iter; + iter = self.ranges[iter.index()].next_in_bundle; + } + if insert_after.is_valid() { + self.ranges[insert_after.index()].next_in_bundle = lr; + if self.bundles[bundle.index()].last_range == insert_after { + self.bundles[bundle.index()].last_range = lr; + } + } else { + let next = self.bundles[bundle.index()].first_range; + self.ranges[lr.index()].next_in_bundle = next; + self.bundles[bundle.index()].first_range = lr; + } + } + } + + fn merge_vreg_bundles(&mut self) { + // Create a bundle for every vreg, initially. + log::debug!("merge_vreg_bundles: creating vreg bundles"); + for vreg in 0..self.vregs.len() { + let vreg = VRegIndex::new(vreg); + if self.vregs[vreg.index()].first_range.is_invalid() { + continue; + } + let bundle = self.create_bundle(); + let mut range = self.vregs[vreg.index()].first_range; + while range.is_valid() { + self.insert_liverange_into_bundle(bundle, range); + range = self.ranges[range.index()].next_in_reg; + } + log::debug!("vreg v{} gets bundle{}", vreg.index(), bundle.index()); + } + + for inst in 0..self.func.insts() { + let inst = Inst::new(inst); + + // Attempt to merge Reuse-policy operand outputs with the corresponding + // inputs. + for operand_idx in 0..self.func.inst_operands(inst).len() { + let operand = self.func.inst_operands(inst)[operand_idx]; + if let OperandPolicy::Reuse(input_idx) = operand.policy() { + log::debug!( + "trying to merge use and def at reused-op {} on inst{}", + operand_idx, + inst.index() + ); + assert_eq!(operand.kind(), OperandKind::Def); + assert_eq!(operand.pos(), OperandPos::After); + let input_vreg = + VRegIndex::new(self.func.inst_operands(inst)[input_idx].vreg().vreg()); + let output_vreg = VRegIndex::new(operand.vreg().vreg()); + self.try_merge_reused_register(input_vreg, output_vreg); + } + } + + // Attempt to merge move srcs and dests. + if let Some((src_vreg, dst_vreg)) = self.func.is_move(inst) { + log::debug!("trying to merge move src {} to dst {}", src_vreg, dst_vreg); + let src_bundle = + self.ranges[self.vregs[src_vreg.vreg()].first_range.index()].bundle; + assert!(src_bundle.is_valid()); + let dest_bundle = + self.ranges[self.vregs[dst_vreg.vreg()].first_range.index()].bundle; + assert!(dest_bundle.is_valid()); + self.merge_bundles(/* from */ dest_bundle, /* to */ src_bundle); + } + } + + // Attempt to merge blockparams with their inputs. + for i in 0..self.blockparam_outs.len() { + let (from_vreg, _, _, to_vreg) = self.blockparam_outs[i]; + log::debug!( + "trying to merge blockparam v{} with input v{}", + to_vreg.index(), + from_vreg.index() + ); + let to_bundle = self.ranges[self.vregs[to_vreg.index()].first_range.index()].bundle; + assert!(to_bundle.is_valid()); + let from_bundle = self.ranges[self.vregs[from_vreg.index()].first_range.index()].bundle; + assert!(from_bundle.is_valid()); + log::debug!( + " -> from bundle{} to bundle{}", + from_bundle.index(), + to_bundle.index() + ); + self.merge_bundles(from_bundle, to_bundle); + } + + log::debug!("done merging bundles"); + } + + fn compute_bundle_prio(&self, bundle: LiveBundleIndex) -> u32 { + // The priority is simply the total "length" -- the number of + // instructions covered by all LiveRanges. + let mut iter = self.bundles[bundle.index()].first_range; + let mut total = 0; + while iter.is_valid() { + total += self.ranges[iter.index()].range.len() as u32; + iter = self.ranges[iter.index()].next_in_bundle; + } + total + } + + fn queue_bundles(&mut self) { + for vreg in 0..self.vregs.len() { + let vreg = VRegIndex::new(vreg); + let mut lr = self.vregs[vreg.index()].first_range; + while lr.is_valid() { + let bundle = self.ranges[lr.index()].bundle; + if self.bundles[bundle.index()].first_range == lr { + // First time seeing `bundle`: allocate a spillslot for it, + // compute its priority, and enqueue it. + let ssidx = SpillSetIndex::new(self.spillsets.len()); + let reg = self.vregs[vreg.index()].reg; + let size = self.func.spillslot_size(reg.class(), reg) as u32; + self.spillsets.push(SpillSet { + bundles: smallvec![], + slot: SpillSlotIndex::invalid(), + size, + class: reg.class(), + reg_hint: None, + }); + self.bundles[bundle.index()].spillset = ssidx; + let prio = self.compute_bundle_prio(bundle); + self.bundles[bundle.index()].prio = prio; + self.recompute_bundle_properties(bundle); + self.allocation_queue.insert(bundle, prio as usize); + } + + // Keep going even if we handled one bundle for this vreg above: + // if we split a vreg's liveranges into multiple bundles, we + // need to hit all the bundles. + lr = self.ranges[lr.index()].next_in_bundle; + } + } + + self.stats.merged_bundle_count = self.allocation_queue.heap.len(); + } + + fn process_bundles(&mut self) { + let mut count = 0; + while let Some(bundle) = self.allocation_queue.pop() { + self.stats.process_bundle_count += 1; + self.process_bundle(bundle); + count += 1; + if count > self.func.insts() * 50 { + self.dump_state(); + panic!("Infinite loop!"); + } + } + self.stats.final_liverange_count = self.ranges.len(); + self.stats.final_bundle_count = self.bundles.len(); + self.stats.spill_bundle_count = self.spilled_bundles.len(); + } + + fn dump_state(&self) { + log::debug!("Bundles:"); + for (i, b) in self.bundles.iter().enumerate() { + log::debug!( + "bundle{}: first_range={:?} last_range={:?} spillset={:?} alloc={:?}", + i, + b.first_range, + b.last_range, + b.spillset, + b.allocation + ); + } + log::debug!("VRegs:"); + for (i, v) in self.vregs.iter().enumerate() { + log::debug!("vreg{}: def={:?} first_range={:?}", i, v.def, v.first_range,); + } + log::debug!("Ranges:"); + for (i, r) in self.ranges.iter().enumerate() { + log::debug!( + concat!( + "range{}: range={:?} vreg={:?} bundle={:?} ", + "weight={} fixed={} first_use={:?} last_use={:?} ", + "def={:?} next_in_bundle={:?} next_in_reg={:?}" + ), + i, + r.range, + r.vreg, + r.bundle, + r.uses_spill_weight, + r.num_fixed_uses(), + r.first_use, + r.last_use, + r.def, + r.next_in_bundle, + r.next_in_reg + ); + } + log::debug!("Uses:"); + for (i, u) in self.uses.iter().enumerate() { + log::debug!( + "use{}: op={:?} pos={:?} slot={} next_use={:?}", + i, + u.operand, + u.pos, + u.slot, + u.next_use + ); + } + log::debug!("Defs:"); + for (i, d) in self.defs.iter().enumerate() { + log::debug!("def{}: op={:?} pos={:?}", i, d.operand, d.pos,); + } + } + + fn compute_requirement(&self, bundle: LiveBundleIndex) -> Option { + let class = self.vregs[self.ranges[self.bundles[bundle.index()].first_range.index()] + .vreg + .index()] + .reg + .class(); + let mut needed = Requirement::Any(class); + + log::debug!("compute_requirement: bundle {:?} class {:?}", bundle, class); + + let mut iter = self.bundles[bundle.index()].first_range; + while iter.is_valid() { + let range = &self.ranges[iter.index()]; + log::debug!(" -> range {:?}", range.range); + if range.def.is_valid() { + let def_op = self.defs[range.def.index()].operand; + let def_req = Requirement::from_operand(def_op); + log::debug!( + " -> def {:?} op {:?} req {:?}", + range.def.index(), + def_op, + def_req + ); + needed = needed.merge(def_req)?; + log::debug!(" -> needed {:?}", needed); + } + let mut use_iter = range.first_use; + while use_iter.is_valid() { + let usedata = &self.uses[use_iter.index()]; + let use_op = usedata.operand; + let use_req = Requirement::from_operand(use_op); + log::debug!(" -> use {:?} op {:?} req {:?}", use_iter, use_op, use_req); + needed = needed.merge(use_req)?; + log::debug!(" -> needed {:?}", needed); + use_iter = usedata.next_use; + } + iter = range.next_in_bundle; + } + + log::debug!(" -> final needed: {:?}", needed); + Some(needed) + } + + fn try_to_allocate_bundle_to_reg( + &mut self, + bundle: LiveBundleIndex, + reg: PRegIndex, + ) -> AllocRegResult { + log::debug!("try_to_allocate_bundle_to_reg: {:?} -> {:?}", bundle, reg); + let mut conflicts = smallvec![]; + let mut iter = self.bundles[bundle.index()].first_range; + while iter.is_valid() { + let range = &self.ranges[iter.index()]; + log::debug!(" -> range {:?}", range); + // Note that the comparator function here tests for *overlap*, so we + // are checking whether the BTree contains any preg range that + // *overlaps* with range `iter`, not literally the range `iter`. + if let Some(preg_range) = self.pregs[reg.index()] + .allocations + .btree + .get(&LiveRangeKey::from_range(&range.range)) + { + log::debug!(" -> btree contains range {:?} that overlaps", preg_range); + if self.ranges[preg_range.index()].vreg.is_valid() { + log::debug!(" -> from vreg {:?}", self.ranges[preg_range.index()].vreg); + // range from an allocated bundle: find the bundle and add to + // conflicts list. + let conflict_bundle = self.ranges[preg_range.index()].bundle; + log::debug!(" -> conflict bundle {:?}", conflict_bundle); + if !conflicts.iter().any(|b| *b == conflict_bundle) { + conflicts.push(conflict_bundle); + } + } else { + log::debug!(" -> conflict with fixed reservation"); + // range from a direct use of the PReg (due to clobber). + return AllocRegResult::ConflictWithFixed; + } + } + iter = range.next_in_bundle; + } + + if conflicts.len() > 0 { + return AllocRegResult::Conflict(conflicts); + } + + // We can allocate! Add our ranges to the preg's BTree. + let preg = self.pregs[reg.index()].reg; + log::debug!(" -> bundle {:?} assigned to preg {:?}", bundle, preg); + self.bundles[bundle.index()].allocation = Allocation::reg(preg); + let mut iter = self.bundles[bundle.index()].first_range; + while iter.is_valid() { + let range = &self.ranges[iter.index()]; + self.pregs[reg.index()] + .allocations + .btree + .insert(LiveRangeKey::from_range(&range.range), iter); + iter = range.next_in_bundle; + } + + AllocRegResult::Allocated(Allocation::reg(preg)) + } + + fn evict_bundle(&mut self, bundle: LiveBundleIndex) { + log::debug!( + "evicting bundle {:?}: alloc {:?}", + bundle, + self.bundles[bundle.index()].allocation + ); + let preg = match self.bundles[bundle.index()].allocation.as_reg() { + Some(preg) => preg, + None => { + log::debug!( + " -> has no allocation! {:?}", + self.bundles[bundle.index()].allocation + ); + return; + } + }; + let preg_idx = PRegIndex::new(preg.index()); + self.bundles[bundle.index()].allocation = Allocation::none(); + let mut iter = self.bundles[bundle.index()].first_range; + while iter.is_valid() { + log::debug!(" -> removing LR {:?} from reg {:?}", iter, preg_idx); + self.pregs[preg_idx.index()] + .allocations + .btree + .remove(&LiveRangeKey::from_range(&self.ranges[iter.index()].range)); + iter = self.ranges[iter.index()].next_in_bundle; + } + let prio = self.bundles[bundle.index()].prio; + log::debug!(" -> prio {}; back into queue", prio); + self.allocation_queue.insert(bundle, prio as usize); + } + + fn bundle_spill_weight(&self, bundle: LiveBundleIndex) -> u32 { + self.bundles[bundle.index()].cached_spill_weight() + } + + fn maximum_spill_weight_in_bundle_set(&self, bundles: &LiveBundleVec) -> u32 { + bundles + .iter() + .map(|&b| self.bundles[b.index()].cached_spill_weight()) + .max() + .unwrap_or(0) + } + + fn recompute_bundle_properties(&mut self, bundle: LiveBundleIndex) { + let minimal; + let mut fixed = false; + let bundledata = &self.bundles[bundle.index()]; + let first_range = &self.ranges[bundledata.first_range.index()]; + + if first_range.vreg.is_invalid() { + minimal = true; + fixed = true; + } else { + if first_range.def.is_valid() { + let def_data = &self.defs[first_range.def.index()]; + if let OperandPolicy::FixedReg(_) = def_data.operand.policy() { + fixed = true; + } + } + let mut use_iter = first_range.first_use; + while use_iter.is_valid() { + let use_data = &self.uses[use_iter.index()]; + if let OperandPolicy::FixedReg(_) = use_data.operand.policy() { + fixed = true; + break; + } + use_iter = use_data.next_use; + } + // Minimal if this is the only range in the bundle, and if + // the range covers only one instruction. Note that it + // could cover just one ProgPoint, i.e. X.Before..X.After, + // or two ProgPoints, i.e. X.Before..X+1.Before. + minimal = first_range.next_in_bundle.is_invalid() + && first_range.range.from.inst == first_range.range.to.prev().inst; + } + + let spill_weight = if minimal { + if fixed { + log::debug!(" -> fixed and minimal: 2000000"); + 2_000_000 + } else { + log::debug!(" -> non-fixed and minimal: 1000000"); + 1_000_000 + } + } else { + let mut total = 0; + let mut range = self.bundles[bundle.index()].first_range; + while range.is_valid() { + let range_data = &self.ranges[range.index()]; + if range_data.def.is_valid() { + log::debug!(" -> has def (2000)"); + total += 2000; + } + log::debug!(" -> uses spill weight: {}", range_data.uses_spill_weight); + total += range_data.uses_spill_weight; + range = range_data.next_in_bundle; + } + + if self.bundles[bundle.index()].prio > 0 { + total / self.bundles[bundle.index()].prio + } else { + total + } + }; + + self.bundles[bundle.index()].set_cached_spill_weight_and_props( + spill_weight, + minimal, + fixed, + ); + } + + fn minimal_bundle(&mut self, bundle: LiveBundleIndex) -> bool { + self.bundles[bundle.index()].cached_minimal() + } + + fn find_split_points( + &mut self, + bundle: LiveBundleIndex, + conflicting: LiveBundleIndex, + ) -> SmallVec<[ProgPoint; 4]> { + // Scan the bundle's ranges once. We want to record: + // - Does the bundle contain any ranges in "hot" code and/or "cold" code? + // If so, record the transition points that are fully included in + // `bundle`: the first ProgPoint in a hot range if the prior cold + // point is also in the bundle; and the first ProgPoint in a cold + // range if the prior hot point is also in the bundle. + // - Does the bundle cross any clobbering insts? + // If so, record the ProgPoint before each such instruction. + // - Is there a register use before the conflicting bundle? + // If so, record the ProgPoint just after the last one. + // - Is there a register use after the conflicting bundle? + // If so, record the ProgPoint just before the last one. + // + // Then choose one of the above kinds of splits, in priority order. + + let mut cold_hot_splits: SmallVec<[ProgPoint; 4]> = smallvec![]; + let mut clobber_splits: SmallVec<[ProgPoint; 4]> = smallvec![]; + let mut last_before_conflict: Option = None; + let mut first_after_conflict: Option = None; + + log::debug!( + "find_split_points: bundle {:?} conflicting {:?}", + bundle, + conflicting + ); + + // We simultaneously scan the sorted list of LiveRanges in our bundle + // and the sorted list of call instruction locations. We also take the + // total range (start of first range to end of last range) of the + // conflicting bundle, if any, so we can find the last use before it and + // first use after it. Each loop iteration handles one range in our + // bundle. Calls are scanned up until they advance past the current + // range. + let mut our_iter = self.bundles[bundle.index()].first_range; + let (conflict_from, conflict_to) = if conflicting.is_valid() { + ( + Some( + self.ranges[self.bundles[conflicting.index()].first_range.index()] + .range + .from, + ), + Some( + self.ranges[self.bundles[conflicting.index()].last_range.index()] + .range + .to, + ), + ) + } else { + (None, None) + }; + + let bundle_start = if self.bundles[bundle.index()].first_range.is_valid() { + self.ranges[self.bundles[bundle.index()].first_range.index()] + .range + .from + } else { + ProgPoint::before(Inst::new(0)) + }; + let bundle_end = if self.bundles[bundle.index()].last_range.is_valid() { + self.ranges[self.bundles[bundle.index()].last_range.index()] + .range + .to + } else { + ProgPoint::before(Inst::new(self.func.insts())) + }; + + log::debug!(" -> conflict from {:?} to {:?}", conflict_from, conflict_to); + let mut clobberidx = 0; + while our_iter.is_valid() { + // Probe the hot-code tree. + let our_range = self.ranges[our_iter.index()].range; + log::debug!(" -> range {:?}", our_range); + if let Some(hot_range_idx) = self + .hot_code + .btree + .get(&LiveRangeKey::from_range(&our_range)) + { + // `hot_range_idx` is a range that *overlaps* with our range. + + // There may be cold code in our range on either side of the hot + // range. Record the transition points if so. + let hot_range = self.ranges[hot_range_idx.index()].range; + log::debug!(" -> overlaps with hot-code range {:?}", hot_range); + let start_cold = our_range.from < hot_range.from; + let end_cold = our_range.to > hot_range.to; + if start_cold { + log::debug!( + " -> our start is cold; potential split at cold->hot transition {:?}", + hot_range.from, + ); + // First ProgPoint in hot range. + cold_hot_splits.push(hot_range.from); + } + if end_cold { + log::debug!( + " -> our end is cold; potential split at hot->cold transition {:?}", + hot_range.to, + ); + // First ProgPoint in cold range (after hot range). + cold_hot_splits.push(hot_range.to); + } + } + + // Scan through clobber-insts from last left-off position until the first + // clobbering inst past this range. Record all clobber sites as potential + // splits. + while clobberidx < self.clobbers.len() { + let cur_clobber = self.clobbers[clobberidx]; + let pos = ProgPoint::before(cur_clobber); + if pos >= our_range.to { + break; + } + clobberidx += 1; + if pos < our_range.from { + continue; + } + if pos > bundle_start { + log::debug!(" -> potential clobber split at {:?}", pos); + clobber_splits.push(pos); + } + } + + // Update last-before-conflict and first-before-conflict positions. + + let mut update_with_pos = |pos: ProgPoint| { + let before_inst = ProgPoint::before(pos.inst); + let before_next_inst = before_inst.next().next(); + if before_inst > bundle_start + && (conflict_from.is_none() || before_inst < conflict_from.unwrap()) + && (last_before_conflict.is_none() + || before_inst > last_before_conflict.unwrap()) + { + last_before_conflict = Some(before_inst); + } + if before_next_inst < bundle_end + && (conflict_to.is_none() || pos >= conflict_to.unwrap()) + && (first_after_conflict.is_none() || pos > first_after_conflict.unwrap()) + { + first_after_conflict = Some(ProgPoint::before(pos.inst.next())); + } + }; + + if self.ranges[our_iter.index()].def.is_valid() { + let def_data = &self.defs[self.ranges[our_iter.index()].def.index()]; + log::debug!(" -> range has def at {:?}", def_data.pos); + update_with_pos(def_data.pos); + } + let mut use_idx = self.ranges[our_iter.index()].first_use; + while use_idx.is_valid() { + let use_data = &self.uses[use_idx.index()]; + log::debug!(" -> range has use at {:?}", use_data.pos); + update_with_pos(use_data.pos); + use_idx = use_data.next_use; + } + + our_iter = self.ranges[our_iter.index()].next_in_bundle; + } + log::debug!( + " -> first use/def after conflict range: {:?}", + first_after_conflict, + ); + log::debug!( + " -> last use/def before conflict range: {:?}", + last_before_conflict, + ); + + // Based on the above, we can determine which split strategy we are taking at this + // iteration: + // - If we span both hot and cold code, split into separate "hot" and "cold" bundles. + // - Otherwise, if we span any calls, split just before every call instruction. + // - Otherwise, if there is a register use after the conflicting bundle, + // split at that use-point ("split before first use"). + // - Otherwise, if there is a register use before the conflicting + // bundle, split at that use-point ("split after last use"). + // - Otherwise, split at every use, to form minimal bundles. + + if cold_hot_splits.len() > 0 { + log::debug!(" going with cold/hot splits: {:?}", cold_hot_splits); + self.stats.splits_hot += 1; + cold_hot_splits + } else if clobber_splits.len() > 0 { + log::debug!(" going with clobber splits: {:?}", clobber_splits); + self.stats.splits_clobbers += 1; + clobber_splits + } else if first_after_conflict.is_some() { + self.stats.splits_conflicts += 1; + log::debug!(" going with first after conflict"); + smallvec![first_after_conflict.unwrap()] + } else if last_before_conflict.is_some() { + self.stats.splits_conflicts += 1; + log::debug!(" going with last before conflict"); + smallvec![last_before_conflict.unwrap()] + } else { + self.stats.splits_all += 1; + log::debug!(" splitting at all uses"); + self.find_all_use_split_points(bundle) + } + } + + fn find_all_use_split_points(&self, bundle: LiveBundleIndex) -> SmallVec<[ProgPoint; 4]> { + let mut splits = smallvec![]; + let mut iter = self.bundles[bundle.index()].first_range; + log::debug!("finding all use/def splits for {:?}", bundle); + let (bundle_start, bundle_end) = if iter.is_valid() { + ( + self.ranges[iter.index()].range.from, + self.ranges[self.bundles[bundle.index()].last_range.index()] + .range + .to, + ) + } else { + ( + ProgPoint::before(Inst::new(0)), + ProgPoint::after(Inst::new(self.func.insts() - 1)), + ) + }; + // N.B.: a minimal bundle must include only ProgPoints in a + // single instruction, but can include both (can include two + // ProgPoints). We split here, taking care to never split *in + // the middle* of an instruction, because we would not be able + // to insert moves to reify such an assignment. + while iter.is_valid() { + let rangedata = &self.ranges[iter.index()]; + log::debug!(" -> range {:?}: {:?}", iter, rangedata.range); + if rangedata.def.is_valid() { + // Split both before and after def (make it a minimal bundle). + let def_pos = self.defs[rangedata.def.index()].pos; + let def_end = ProgPoint::before(def_pos.inst.next()); + log::debug!( + " -> splitting before and after def: {:?} and {:?}", + def_pos, + def_end, + ); + if def_pos > bundle_start { + splits.push(def_pos); + } + if def_end < bundle_end { + splits.push(def_end); + } + } + let mut use_idx = rangedata.first_use; + while use_idx.is_valid() { + let use_data = &self.uses[use_idx.index()]; + let before_use_inst = ProgPoint::before(use_data.pos.inst); + let after_use_inst = before_use_inst.next().next(); + log::debug!( + " -> splitting before and after use: {:?} and {:?}", + before_use_inst, + after_use_inst, + ); + if before_use_inst > bundle_start { + splits.push(before_use_inst); + } + splits.push(after_use_inst); + use_idx = use_data.next_use; + } + + iter = rangedata.next_in_bundle; + } + splits.sort(); + log::debug!(" -> final splits: {:?}", splits); + splits + } + + fn split_and_requeue_bundle( + &mut self, + bundle: LiveBundleIndex, + first_conflicting_bundle: LiveBundleIndex, + ) { + self.stats.splits += 1; + // Try splitting: (i) across hot code; (ii) across all calls, + // if we had a fixed-reg conflict; (iii) before first reg use; + // (iv) after reg use; (v) around all register uses. After + // each type of split, check for conflict with conflicting + // bundle(s); stop when no conflicts. In all cases, re-queue + // the split bundles on the allocation queue. + // + // The critical property here is that we must eventually split + // down to minimal bundles, which consist just of live ranges + // around each individual def/use (this is step (v) + // above). This ensures termination eventually. + + let split_points = self.find_split_points(bundle, first_conflicting_bundle); + log::debug!( + "split bundle {:?} (conflict {:?}): split points {:?}", + bundle, + first_conflicting_bundle, + split_points + ); + + // Split `bundle` at every ProgPoint in `split_points`, + // creating new LiveRanges and bundles (and updating vregs' + // linked lists appropriately), and enqueue the new bundles. + // + // We uphold several basic invariants here: + // - The LiveRanges in every vreg, and in every bundle, are disjoint + // - Every bundle for a given vreg is disjoint + // + // To do so, we make one scan in program order: all ranges in + // the bundle, and the def/all uses in each range. We track + // the currently active bundle. For each range, we distribute + // its uses among one or more ranges, depending on whether it + // crosses any split points. If we had to split a range, then + // we need to insert the new subparts in its vreg as + // well. N.B.: to avoid the need to *remove* ranges from vregs + // (which we could not do without a lookup, since we use + // singly-linked lists and the bundle may contain multiple + // vregs so we cannot simply scan a single vreg simultaneously + // to the main scan), we instead *trim* the existing range + // into its first subpart, and then create the new + // subparts. Note that shrinking a LiveRange is always legal + // (as long as one replaces the shrunk space with new + // LiveRanges). + // + // Note that the original IonMonkey splitting code is quite a + // bit more complex and has some subtle invariants. We stick + // to the above invariants to keep this code maintainable. + + let mut split_idx = 0; + + // Fast-forward past any splits that occur before or exactly + // at the start of the first range in the bundle. + let first_range = self.bundles[bundle.index()].first_range; + let bundle_start = if first_range.is_valid() { + self.ranges[first_range.index()].range.from + } else { + ProgPoint::before(Inst::new(0)) + }; + while split_idx < split_points.len() && split_points[split_idx] <= bundle_start { + split_idx += 1; + } + + let mut new_bundles: LiveBundleVec = smallvec![]; + let mut cur_bundle = bundle; + let mut iter = self.bundles[bundle.index()].first_range; + self.bundles[bundle.index()].first_range = LiveRangeIndex::invalid(); + self.bundles[bundle.index()].last_range = LiveRangeIndex::invalid(); + while iter.is_valid() { + // Read `next` link now and then clear it -- we rebuild the list below. + let next = self.ranges[iter.index()].next_in_bundle; + self.ranges[iter.index()].next_in_bundle = LiveRangeIndex::invalid(); + + let mut range = self.ranges[iter.index()].range; + log::debug!(" -> has range {:?} (LR {:?})", range, iter); + + // If any splits occur before this range, create a new + // bundle, then advance to the first split within the + // range. + if split_idx < split_points.len() && split_points[split_idx] <= range.from { + log::debug!(" -> split before a range; creating new bundle"); + cur_bundle = self.create_bundle(); + self.bundles[cur_bundle.index()].spillset = self.bundles[bundle.index()].spillset; + new_bundles.push(cur_bundle); + split_idx += 1; + } + while split_idx < split_points.len() && split_points[split_idx] <= range.from { + split_idx += 1; + } + + // Link into current bundle. + self.ranges[iter.index()].bundle = cur_bundle; + if self.bundles[cur_bundle.index()].first_range.is_valid() { + self.ranges[self.bundles[cur_bundle.index()].last_range.index()].next_in_bundle = + iter; + } else { + self.bundles[cur_bundle.index()].first_range = iter; + } + self.bundles[cur_bundle.index()].last_range = iter; + + // While the next split point is beyond the start of the + // range and before the end, shorten the current LiveRange + // (this is always legal) and create a new Bundle and + // LiveRange for the remainder. Truncate the old bundle + // (set last_range). Insert the LiveRange into the vreg + // and into the new bundle. Then move the use-chain over, + // splitting at the appropriate point. + // + // We accumulate the use stats (fixed-use count and spill + // weight) as we scan through uses, recomputing the values + // for the truncated initial LiveRange and taking the + // remainders for the split "rest" LiveRange. + + while split_idx < split_points.len() && split_points[split_idx] < range.to { + let split_point = split_points[split_idx]; + split_idx += 1; + + // Skip forward to the current range. + if split_point <= range.from { + continue; + } + + log::debug!( + " -> processing split point {:?} with iter {:?}", + split_point, + iter + ); + + // We split into `first` and `rest`. `rest` may be + // further subdivided in subsequent iterations; we + // only do one split per iteration. + debug_assert!(range.from < split_point && split_point < range.to); + let rest_range = CodeRange { + from: split_point, + to: self.ranges[iter.index()].range.to, + }; + self.ranges[iter.index()].range.to = split_point; + range = rest_range; + log::debug!( + " -> range of {:?} now {:?}", + iter, + self.ranges[iter.index()].range + ); + + // Create the rest-range and insert it into the vreg's + // range list. (Note that the vreg does not keep a + // tail-pointer so we do not need to update that.) + let rest_lr = self.create_liverange(rest_range); + self.ranges[rest_lr.index()].vreg = self.ranges[iter.index()].vreg; + self.ranges[rest_lr.index()].next_in_reg = self.ranges[iter.index()].next_in_reg; + self.ranges[iter.index()].next_in_reg = rest_lr; + + log::debug!( + " -> split tail to new LR {:?} with range {:?}", + rest_lr, + rest_range + ); + + // Scan over uses, accumulating stats for those that + // stay in the first range, finding the first use that + // moves to the rest range. + let mut last_use_in_first_range = UseIndex::invalid(); + let mut use_iter = self.ranges[iter.index()].first_use; + let mut num_fixed_uses = 0; + let mut uses_spill_weight = 0; + while use_iter.is_valid() { + if self.uses[use_iter.index()].pos >= split_point { + break; + } + last_use_in_first_range = use_iter; + let policy = self.uses[use_iter.index()].operand.policy(); + log::debug!( + " -> use {:?} before split point; policy {:?}", + use_iter, + policy + ); + if let OperandPolicy::FixedReg(_) = policy { + num_fixed_uses += 1; + } + uses_spill_weight += spill_weight_from_policy(policy); + log::debug!(" -> use {:?} remains in orig", use_iter); + use_iter = self.uses[use_iter.index()].next_use; + } + + // Move over `rest`'s uses and update stats on first + // and rest LRs. + if use_iter.is_valid() { + log::debug!( + " -> moving uses over the split starting at {:?}", + use_iter + ); + self.ranges[rest_lr.index()].first_use = use_iter; + self.ranges[rest_lr.index()].last_use = self.ranges[iter.index()].last_use; + + self.ranges[iter.index()].last_use = last_use_in_first_range; + if last_use_in_first_range.is_valid() { + self.uses[last_use_in_first_range.index()].next_use = UseIndex::invalid(); + } else { + self.ranges[iter.index()].first_use = UseIndex::invalid(); + } + + let rest_fixed_uses = + self.ranges[iter.index()].num_fixed_uses() - num_fixed_uses; + self.ranges[rest_lr.index()].set_num_fixed_uses(rest_fixed_uses); + self.ranges[rest_lr.index()].uses_spill_weight = + self.ranges[iter.index()].uses_spill_weight - uses_spill_weight; + self.ranges[iter.index()].set_num_fixed_uses(num_fixed_uses); + self.ranges[iter.index()].uses_spill_weight = uses_spill_weight; + } + + // Move over def, if appropriate. + if self.ranges[iter.index()].def.is_valid() { + let def_idx = self.ranges[iter.index()].def; + let def_pos = self.defs[def_idx.index()].pos; + log::debug!(" -> range {:?} has def at {:?}", iter, def_pos); + if def_pos >= split_point { + log::debug!(" -> transferring def bit to {:?}", rest_lr); + self.ranges[iter.index()].def = DefIndex::invalid(); + self.ranges[rest_lr.index()].def = def_idx; + } + } + + log::debug!( + " -> range {:?} next-in-bundle is {:?}", + iter, + self.ranges[iter.index()].next_in_bundle + ); + + // Create a new bundle to hold the rest-range. + let rest_bundle = self.create_bundle(); + cur_bundle = rest_bundle; + new_bundles.push(rest_bundle); + self.bundles[rest_bundle.index()].first_range = rest_lr; + self.bundles[rest_bundle.index()].last_range = rest_lr; + self.bundles[rest_bundle.index()].spillset = self.bundles[bundle.index()].spillset; + self.ranges[rest_lr.index()].bundle = rest_bundle; + log::debug!(" -> new bundle {:?} for LR {:?}", rest_bundle, rest_lr); + + iter = rest_lr; + } + + iter = next; + } + + // Enqueue all split-bundles on the allocation queue. + let prio = self.compute_bundle_prio(bundle); + self.bundles[bundle.index()].prio = prio; + self.recompute_bundle_properties(bundle); + self.allocation_queue.insert(bundle, prio as usize); + for b in new_bundles { + let prio = self.compute_bundle_prio(b); + self.bundles[b.index()].prio = prio; + self.recompute_bundle_properties(b); + self.allocation_queue.insert(b, prio as usize); + } + } + + fn process_bundle(&mut self, bundle: LiveBundleIndex) { + // Find any requirements: for every LR, for every def/use, gather + // requirements (fixed-reg, any-reg, any) and merge them. + let req = self.compute_requirement(bundle); + // Grab a hint from our spillset, if any. + let hint_reg = self.spillsets[self.bundles[bundle.index()].spillset.index()].reg_hint; + log::debug!( + "process_bundle: bundle {:?} requirement {:?} hint {:?}", + bundle, + req, + hint_reg, + ); + + // Try to allocate! + let mut attempts = 0; + let mut first_conflicting_bundle; + loop { + attempts += 1; + debug_assert!(attempts < 100 * self.func.insts()); + first_conflicting_bundle = None; + let req = match req { + Some(r) => r, + // `None` means conflicting requirements, hence impossible to + // allocate. + None => break, + }; + + let conflicting_bundles = match req { + Requirement::Fixed(preg) => { + let preg_idx = PRegIndex::new(preg.index()); + self.stats.process_bundle_reg_probes_fixed += 1; + match self.try_to_allocate_bundle_to_reg(bundle, preg_idx) { + AllocRegResult::Allocated(alloc) => { + self.stats.process_bundle_reg_success_fixed += 1; + log::debug!(" -> allocated to fixed {:?}", preg_idx); + self.spillsets[self.bundles[bundle.index()].spillset.index()] + .reg_hint = Some(alloc.as_reg().unwrap()); + return; + } + AllocRegResult::Conflict(bundles) => bundles, + AllocRegResult::ConflictWithFixed => { + // Empty conflicts set: there's nothing we can + // evict, because fixed conflicts cannot be moved. + smallvec![] + } + } + } + Requirement::Register(class) => { + // Scan all pregs and attempt to allocate. + let mut lowest_cost_conflict_set: Option = None; + let n_regs = self.env.regs_by_class[class as u8 as usize].len(); + let loop_count = if hint_reg.is_some() { + n_regs + 1 + } else { + n_regs + }; + for i in 0..loop_count { + // The order in which we try registers is somewhat complex: + // - First, if there is a hint, we try that. + // - Then, we try registers in a traversal + // order that is based on the bundle index, + // spreading pressure evenly among registers + // to reduce commitment-map + // contention. (TODO: account for + // caller-save vs. callee-saves here too.) + // Note that we avoid retrying the hint_reg; + // this is why the loop count is n_regs + 1 + // if there is a hint reg, because we always + // skip one iteration. + let preg = match (i, hint_reg) { + (0, Some(hint_reg)) => hint_reg, + (i, Some(hint_reg)) => { + let reg = self.env.regs_by_class[class as u8 as usize] + [(i - 1 + bundle.index()) % n_regs]; + if reg == hint_reg { + continue; + } + reg + } + (i, None) => { + self.env.regs_by_class[class as u8 as usize] + [(i + bundle.index()) % n_regs] + } + }; + + self.stats.process_bundle_reg_probes_any += 1; + let preg_idx = PRegIndex::new(preg.index()); + match self.try_to_allocate_bundle_to_reg(bundle, preg_idx) { + AllocRegResult::Allocated(alloc) => { + self.stats.process_bundle_reg_success_any += 1; + log::debug!(" -> allocated to any {:?}", preg_idx); + self.spillsets[self.bundles[bundle.index()].spillset.index()] + .reg_hint = Some(alloc.as_reg().unwrap()); + return; + } + AllocRegResult::Conflict(bundles) => { + if lowest_cost_conflict_set.is_none() { + lowest_cost_conflict_set = Some(bundles); + } else if self.maximum_spill_weight_in_bundle_set(&bundles) + < self.maximum_spill_weight_in_bundle_set( + lowest_cost_conflict_set.as_ref().unwrap(), + ) + { + lowest_cost_conflict_set = Some(bundles); + } + } + AllocRegResult::ConflictWithFixed => { + // Simply don't consider as an option. + } + } + } + + // Otherwise, we *require* a register, but didn't fit into + // any with current bundle assignments. Hence, we will need + // to either split or attempt to evict some bundles. Return + // the conflicting bundles to evict and retry. Empty list + // means nothing to try (due to fixed conflict) so we must + // split instead. + lowest_cost_conflict_set.unwrap_or(smallvec![]) + } + + Requirement::Any(_) => { + // If a register is not *required*, spill now (we'll retry + // allocation on spilled bundles later). + log::debug!("spilling bundle {:?} to spilled_bundles list", bundle); + self.spilled_bundles.push(bundle); + return; + } + }; + + log::debug!(" -> conflict set {:?}", conflicting_bundles); + + // If we have already tried evictions once before and are still unsuccessful, give up + // and move on to splitting as long as this is not a minimal bundle. + if attempts >= 2 && !self.minimal_bundle(bundle) { + break; + } + + // If we hit a fixed conflict, give up and move on to splitting. + if conflicting_bundles.is_empty() { + break; + } + + first_conflicting_bundle = Some(conflicting_bundles[0]); + + // If the maximum spill weight in the conflicting-bundles set is >= this bundle's spill + // weight, then don't evict. + if self.maximum_spill_weight_in_bundle_set(&conflicting_bundles) + >= self.bundle_spill_weight(bundle) + { + log::debug!(" -> we're already the cheapest bundle to spill -- going to split"); + break; + } + + // Evict all bundles in `conflicting bundles` and try again. + self.stats.evict_bundle_event += 1; + for &bundle in &conflicting_bundles { + log::debug!(" -> evicting {:?}", bundle); + self.evict_bundle(bundle); + self.stats.evict_bundle_count += 1; + } + } + + // A minimal bundle cannot be split. + if self.minimal_bundle(bundle) { + self.dump_state(); + } + debug_assert!(!self.minimal_bundle(bundle)); + + self.split_and_requeue_bundle( + bundle, + first_conflicting_bundle.unwrap_or(LiveBundleIndex::invalid()), + ); + } + + fn try_allocating_regs_for_spilled_bundles(&mut self) { + for i in 0..self.spilled_bundles.len() { + let bundle = self.spilled_bundles[i]; // don't borrow self + let any_vreg = self.vregs[self.ranges + [self.bundles[bundle.index()].first_range.index()] + .vreg + .index()] + .reg; + let class = any_vreg.class(); + let mut success = false; + self.stats.spill_bundle_reg_probes += 1; + let nregs = self.env.regs_by_class[class as u8 as usize].len(); + for i in 0..nregs { + let i = (i + bundle.index()) % nregs; + let preg = self.env.regs_by_class[class as u8 as usize][i]; // don't borrow self + let preg_idx = PRegIndex::new(preg.index()); + if let AllocRegResult::Allocated(_) = + self.try_to_allocate_bundle_to_reg(bundle, preg_idx) + { + self.stats.spill_bundle_reg_success += 1; + success = true; + break; + } + } + if !success { + log::debug!( + "spilling bundle {:?} to spillset bundle list {:?}", + bundle, + self.bundles[bundle.index()].spillset + ); + self.spillsets[self.bundles[bundle.index()].spillset.index()] + .bundles + .push(bundle); + } + } + } + + fn spillslot_can_fit_spillset( + &mut self, + spillslot: SpillSlotIndex, + spillset: SpillSetIndex, + ) -> bool { + for &bundle in &self.spillsets[spillset.index()].bundles { + let mut iter = self.bundles[bundle.index()].first_range; + while iter.is_valid() { + let range = self.ranges[iter.index()].range; + if self.spillslots[spillslot.index()] + .ranges + .btree + .contains_key(&LiveRangeKey::from_range(&range)) + { + return false; + } + iter = self.ranges[iter.index()].next_in_bundle; + } + } + true + } + + fn allocate_spillset_to_spillslot( + &mut self, + spillset: SpillSetIndex, + spillslot: SpillSlotIndex, + ) { + self.spillsets[spillset.index()].slot = spillslot; + for i in 0..self.spillsets[spillset.index()].bundles.len() { + // don't borrow self + let bundle = self.spillsets[spillset.index()].bundles[i]; + log::debug!( + "spillslot {:?} alloc'ed to spillset {:?}: bundle {:?}", + spillslot, + spillset, + bundle + ); + let mut iter = self.bundles[bundle.index()].first_range; + while iter.is_valid() { + log::debug!( + "spillslot {:?} getting range {:?} from bundle {:?}: {:?}", + spillslot, + iter, + bundle, + self.ranges[iter.index()].range + ); + let range = self.ranges[iter.index()].range; + self.spillslots[spillslot.index()] + .ranges + .btree + .insert(LiveRangeKey::from_range(&range), iter); + iter = self.ranges[iter.index()].next_in_bundle; + } + } + } + + fn allocate_spillslots(&mut self) { + for spillset in 0..self.spillsets.len() { + log::debug!("allocate spillslot: {}", spillset); + let spillset = SpillSetIndex::new(spillset); + if self.spillsets[spillset.index()].bundles.is_empty() { + continue; + } + // Get or create the spillslot list for this size. + let size = self.spillsets[spillset.index()].size as usize; + if size >= self.slots_by_size.len() { + self.slots_by_size.resize( + size + 1, + SpillSlotList { + first_spillslot: SpillSlotIndex::invalid(), + last_spillslot: SpillSlotIndex::invalid(), + }, + ); + } + // Try a few existing spillslots. + let mut spillslot_iter = self.slots_by_size[size].first_spillslot; + let mut first_slot = SpillSlotIndex::invalid(); + let mut prev = SpillSlotIndex::invalid(); + let mut success = false; + for _attempt in 0..10 { + if spillslot_iter.is_invalid() { + break; + } + if spillslot_iter == first_slot { + // We've started looking at slots we placed at the end; end search. + break; + } + if first_slot.is_invalid() { + first_slot = spillslot_iter; + } + + if self.spillslot_can_fit_spillset(spillslot_iter, spillset) { + self.allocate_spillset_to_spillslot(spillset, spillslot_iter); + success = true; + break; + } + // Remove the slot and place it at the end of the respective list. + let next = self.spillslots[spillslot_iter.index()].next_spillslot; + if prev.is_valid() { + self.spillslots[prev.index()].next_spillslot = next; + } else { + self.slots_by_size[size].first_spillslot = next; + } + if !next.is_valid() { + self.slots_by_size[size].last_spillslot = prev; + } + + let last = self.slots_by_size[size].last_spillslot; + if last.is_valid() { + self.spillslots[last.index()].next_spillslot = spillslot_iter; + } else { + self.slots_by_size[size].first_spillslot = spillslot_iter; + } + self.slots_by_size[size].last_spillslot = spillslot_iter; + + prev = spillslot_iter; + spillslot_iter = next; + } + + if !success { + // Allocate a new spillslot. + let spillslot = SpillSlotIndex::new(self.spillslots.len()); + let next = self.slots_by_size[size].first_spillslot; + self.spillslots.push(SpillSlotData { + ranges: LiveRangeSet::new(), + next_spillslot: next, + size: size as u32, + alloc: Allocation::none(), + class: self.spillsets[spillset.index()].class, + }); + self.slots_by_size[size].first_spillslot = spillslot; + if !next.is_valid() { + self.slots_by_size[size].last_spillslot = spillslot; + } + + self.allocate_spillset_to_spillslot(spillset, spillslot); + } + } + + // Assign actual slot indices to spillslots. + let mut offset: u32 = 0; + for data in &mut self.spillslots { + // Align up to `size`. + debug_assert!(data.size.is_power_of_two()); + offset = (offset + data.size - 1) & !(data.size - 1); + let slot = if self.func.multi_spillslot_named_by_last_slot() { + offset + data.size - 1 + } else { + offset + }; + data.alloc = Allocation::stack(SpillSlot::new(slot as usize, data.class)); + offset += data.size; + } + self.num_spillslots = offset; + + log::debug!("spillslot allocator done"); + } + + fn is_start_of_block(&self, pos: ProgPoint) -> bool { + let block = self.cfginfo.insn_block[pos.inst.index()]; + pos == self.cfginfo.block_entry[block.index()] + } + fn is_end_of_block(&self, pos: ProgPoint) -> bool { + let block = self.cfginfo.insn_block[pos.inst.index()]; + pos == self.cfginfo.block_exit[block.index()] + } + + fn insert_move( + &mut self, + pos: ProgPoint, + prio: InsertMovePrio, + from_alloc: Allocation, + to_alloc: Allocation, + ) { + debug!( + "insert_move: pos {:?} prio {:?} from_alloc {:?} to_alloc {:?}", + pos, prio, from_alloc, to_alloc + ); + self.inserted_moves.push(InsertedMove { + pos, + prio, + from_alloc, + to_alloc, + }); + } + + fn get_alloc(&self, inst: Inst, slot: usize) -> Allocation { + let inst_allocs = &self.allocs[self.inst_alloc_offsets[inst.index()] as usize..]; + inst_allocs[slot] + } + + fn set_alloc(&mut self, inst: Inst, slot: usize, alloc: Allocation) { + let inst_allocs = &mut self.allocs[self.inst_alloc_offsets[inst.index()] as usize..]; + inst_allocs[slot] = alloc; + } + + fn get_alloc_for_range(&self, range: LiveRangeIndex) -> Allocation { + let bundledata = &self.bundles[self.ranges[range.index()].bundle.index()]; + if bundledata.allocation != Allocation::none() { + bundledata.allocation + } else { + self.spillslots[self.spillsets[bundledata.spillset.index()].slot.index()].alloc + } + } + + fn apply_allocations_and_insert_moves(&mut self) { + log::debug!("blockparam_ins: {:?}", self.blockparam_ins); + log::debug!("blockparam_outs: {:?}", self.blockparam_outs); + + /// We create "half-moves" in order to allow a single-scan + /// strategy with a subsequent sort. Basically, the key idea + /// is that as our single scan through a range for a vreg hits + /// upon the source or destination of an edge-move, we emit a + /// "half-move". These half-moves are carefully keyed in a + /// particular sort order (the field order below is + /// significant!) so that all half-moves on a given (from, to) + /// block-edge appear contiguously, and then all moves from a + /// given vreg appear contiguously. Within a given from-vreg, + /// pick the first `Source` (there should only be one, but + /// imprecision in liveranges due to loop handling sometimes + /// means that a blockparam-out is also recognized as a normal-out), + /// and then for each `Dest`, copy the source-alloc to that + /// dest-alloc. + #[derive(Clone, Debug, PartialEq, Eq)] + struct HalfMove { + key: u64, + alloc: Allocation, + } + #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] + #[repr(u8)] + enum HalfMoveKind { + Source = 0, + Dest = 1, + } + fn half_move_key( + from_block: Block, + to_block: Block, + to_vreg: VRegIndex, + kind: HalfMoveKind, + ) -> u64 { + assert!(from_block.index() < 1 << 21); + assert!(to_block.index() < 1 << 21); + assert!(to_vreg.index() < 1 << 21); + ((from_block.index() as u64) << 43) + | ((to_block.index() as u64) << 22) + | ((to_vreg.index() as u64) << 1) + | (kind as u8 as u64) + } + impl HalfMove { + fn from_block(&self) -> Block { + Block::new(((self.key >> 43) & ((1 << 21) - 1)) as usize) + } + fn to_block(&self) -> Block { + Block::new(((self.key >> 22) & ((1 << 21) - 1)) as usize) + } + fn to_vreg(&self) -> VRegIndex { + VRegIndex::new(((self.key >> 1) & ((1 << 21) - 1)) as usize) + } + fn kind(&self) -> HalfMoveKind { + if self.key & 1 == 1 { + HalfMoveKind::Dest + } else { + HalfMoveKind::Source + } + } + } + + let mut half_moves: Vec = vec![]; + + let mut reuse_input_insts = vec![]; + + let mut blockparam_in_idx = 0; + let mut blockparam_out_idx = 0; + for vreg in 0..self.vregs.len() { + let vreg = VRegIndex::new(vreg); + let defidx = self.vregs[vreg.index()].def; + let defining_block = if defidx.is_valid() { + self.cfginfo.insn_block[self.defs[defidx.index()].pos.inst.index()] + } else if self.vregs[vreg.index()].blockparam.is_valid() { + self.vregs[vreg.index()].blockparam + } else { + Block::invalid() + }; + + // For each range in each vreg, insert moves or + // half-moves. We also scan over `blockparam_ins` and + // `blockparam_outs`, which are sorted by (block, vreg). + let mut iter = self.vregs[vreg.index()].first_range; + let mut prev = LiveRangeIndex::invalid(); + while iter.is_valid() { + let alloc = self.get_alloc_for_range(iter); + let range = self.ranges[iter.index()].range; + log::debug!( + "apply_allocations: vreg {:?} LR {:?} with range {:?} has alloc {:?}", + vreg, + iter, + range, + alloc + ); + debug_assert!(alloc != Allocation::none()); + + if log::log_enabled!(log::Level::Debug) { + self.annotate( + range.from, + format!( + " <<< start v{} in {} (LR {})", + vreg.index(), + alloc, + iter.index() + ), + ); + self.annotate( + range.to, + format!( + " end v{} in {} (LR {}) >>>", + vreg.index(), + alloc, + iter.index() + ), + ); + } + + // Does this range follow immediately after a prior + // range in the same block? If so, insert a move (if + // the allocs differ). We do this directly rather than + // with half-moves because we eagerly know both sides + // already (and also, half-moves are specific to + // inter-block transfers). + // + // Note that we do *not* do this if there is also a + // def exactly at `range.from`: it's possible that an + // old liverange covers the Before pos of an inst, a + // new liverange covers the After pos, and the def + // also happens at After. In this case we don't want + // to an insert a move after the instruction copying + // the old liverange. + // + // Note also that we assert that the new range has to + // start at the Before-point of an instruction; we + // can't insert a move that logically happens just + // before After (i.e. in the middle of a single + // instruction). + if prev.is_valid() { + let prev_alloc = self.get_alloc_for_range(prev); + let prev_range = self.ranges[prev.index()].range; + let def_idx = self.ranges[iter.index()].def; + let def_pos = if def_idx.is_valid() { + Some(self.defs[def_idx.index()].pos) + } else { + None + }; + debug_assert!(prev_alloc != Allocation::none()); + if prev_range.to == range.from + && !self.is_start_of_block(range.from) + && def_pos != Some(range.from) + { + log::debug!( + "prev LR {} abuts LR {} in same block; moving {} -> {} for v{}", + prev.index(), + iter.index(), + prev_alloc, + alloc, + vreg.index() + ); + assert_eq!(range.from.pos, InstPosition::Before); + self.insert_move(range.from, InsertMovePrio::Regular, prev_alloc, alloc); + } + } + + // Scan over blocks whose ends are covered by this + // range. For each, for each successor that is not + // already in this range (hence guaranteed to have the + // same allocation) and if the vreg is live, add a + // Source half-move. + let mut block = self.cfginfo.insn_block[range.from.inst.index()]; + while block.is_valid() && block.index() < self.func.blocks() { + if range.to < self.cfginfo.block_exit[block.index()].next() { + break; + } + log::debug!("examining block with end in range: block{}", block.index()); + for &succ in self.func.block_succs(block) { + log::debug!( + " -> has succ block {} with entry {:?}", + succ.index(), + self.cfginfo.block_entry[succ.index()] + ); + if range.contains_point(self.cfginfo.block_entry[succ.index()]) { + continue; + } + log::debug!(" -> out of this range, requires half-move if live"); + if self.liveins[succ.index()].get(vreg.index()) { + log::debug!(" -> live at input to succ, adding halfmove"); + half_moves.push(HalfMove { + key: half_move_key(block, succ, vreg, HalfMoveKind::Source), + alloc, + }); + } + } + + // Scan forward in `blockparam_outs`, adding all + // half-moves for outgoing values to blockparams + // in succs. + log::debug!( + "scanning blockparam_outs for v{} block{}: blockparam_out_idx = {}", + vreg.index(), + block.index(), + blockparam_out_idx, + ); + while blockparam_out_idx < self.blockparam_outs.len() { + let (from_vreg, from_block, to_block, to_vreg) = + self.blockparam_outs[blockparam_out_idx]; + if (from_vreg, from_block) > (vreg, block) { + break; + } + if (from_vreg, from_block) == (vreg, block) { + log::debug!( + " -> found: from v{} block{} to v{} block{}", + from_vreg.index(), + from_block.index(), + to_vreg.index(), + to_vreg.index() + ); + half_moves.push(HalfMove { + key: half_move_key( + from_block, + to_block, + to_vreg, + HalfMoveKind::Source, + ), + alloc, + }); + if log::log_enabled!(log::Level::Debug) { + self.annotate( + self.cfginfo.block_exit[block.index()], + format!( + "blockparam-out: block{} to block{}: v{} to v{} in {}", + from_block.index(), + to_block.index(), + from_vreg.index(), + to_vreg.index(), + alloc + ), + ); + } + } + blockparam_out_idx += 1; + } + + block = block.next(); + } + + // Scan over blocks whose beginnings are covered by + // this range and for which the vreg is live at the + // start of the block, and for which the def of the + // vreg is not in this block. For each, for each + // predecessor, add a Dest half-move. + // + // N.B.: why "def of this vreg is not in this block"? + // Because live-range computation can over-approximate + // (due to the way that we handle loops in a single + // pass), especially if the program has irreducible + // control flow and/or if blocks are not in RPO, it + // may be the case that (i) the vreg is not *actually* + // live into this block, but is *defined* in this + // block. If the value is defined in this block, + // because this is SSA, the value cannot be used + // before the def and so we are not concerned about + // any incoming allocation for it. + let mut block = self.cfginfo.insn_block[range.from.inst.index()]; + if self.cfginfo.block_entry[block.index()] < range.from { + block = block.next(); + } + while block.is_valid() && block.index() < self.func.blocks() { + if self.cfginfo.block_entry[block.index()] >= range.to { + break; + } + + // Add half-moves for blockparam inputs. + log::debug!( + "scanning blockparam_ins at vreg {} block {}: blockparam_in_idx = {}", + vreg.index(), + block.index(), + blockparam_in_idx + ); + while blockparam_in_idx < self.blockparam_ins.len() { + let (to_vreg, to_block, from_block) = + self.blockparam_ins[blockparam_in_idx]; + if (to_vreg, to_block) > (vreg, block) { + break; + } + if (to_vreg, to_block) == (vreg, block) { + half_moves.push(HalfMove { + key: half_move_key( + from_block, + to_block, + to_vreg, + HalfMoveKind::Dest, + ), + alloc, + }); + log::debug!( + "match: blockparam_in: v{} in block{} from block{} into {}", + to_vreg.index(), + to_block.index(), + from_block.index(), + alloc, + ); + if log::log_enabled!(log::Level::Debug) { + self.annotate( + self.cfginfo.block_entry[block.index()], + format!( + "blockparam-in: block{} to block{}:into v{} in {}", + from_block.index(), + to_block.index(), + to_vreg.index(), + alloc + ), + ); + } + } + blockparam_in_idx += 1; + } + + // The below (range incoming into block) must be + // skipped if the def is in this block, as noted + // above. + if block == defining_block || !self.liveins[block.index()].get(vreg.index()) { + block = block.next(); + continue; + } + + log::debug!( + "scanning preds at vreg {} block {} for ends outside the range", + vreg.index(), + block.index() + ); + + // Now find any preds whose ends are not in the + // same range, and insert appropriate moves. + for &pred in self.func.block_preds(block) { + log::debug!( + "pred block {} has exit {:?}", + pred.index(), + self.cfginfo.block_exit[pred.index()] + ); + if range.contains_point(self.cfginfo.block_exit[pred.index()]) { + continue; + } + log::debug!(" -> requires half-move"); + half_moves.push(HalfMove { + key: half_move_key(pred, block, vreg, HalfMoveKind::Dest), + alloc, + }); + } + + block = block.next(); + } + + // If this is a blockparam vreg and the start of block + // is in this range, add to blockparam_allocs. + let (blockparam_block, blockparam_idx) = + self.cfginfo.vreg_def_blockparam[vreg.index()]; + if blockparam_block.is_valid() + && range.contains_point(self.cfginfo.block_entry[blockparam_block.index()]) + { + self.blockparam_allocs + .push((blockparam_block, blockparam_idx, vreg, alloc)); + } + + // Scan over def/uses and apply allocations. + if self.ranges[iter.index()].def.is_valid() { + let defdata = &self.defs[self.ranges[iter.index()].def.index()]; + debug_assert!(range.contains_point(defdata.pos)); + let operand = defdata.operand; + let inst = defdata.pos.inst; + let slot = defdata.slot; + self.set_alloc(inst, slot, alloc); + if let OperandPolicy::Reuse(_) = operand.policy() { + reuse_input_insts.push(inst); + } + } + let mut use_iter = self.ranges[iter.index()].first_use; + while use_iter.is_valid() { + let usedata = &self.uses[use_iter.index()]; + debug_assert!(range.contains_point(usedata.pos)); + let inst = usedata.pos.inst; + let slot = usedata.slot; + self.set_alloc(inst, slot, alloc); + use_iter = self.uses[use_iter.index()].next_use; + } + + prev = iter; + iter = self.ranges[iter.index()].next_in_reg; + } + } + + // Sort the half-moves list. For each (from, to, + // from-vreg) tuple, find the from-alloc and all the + // to-allocs, and insert moves on the block edge. + half_moves.sort_by_key(|h| h.key); + log::debug!("halfmoves: {:?}", half_moves); + self.stats.halfmoves_count = half_moves.len(); + + let mut i = 0; + while i < half_moves.len() { + // Find a Source. + while i < half_moves.len() && half_moves[i].kind() != HalfMoveKind::Source { + i += 1; + } + if i >= half_moves.len() { + break; + } + let src = &half_moves[i]; + i += 1; + + // Find all Dests. + let dest_key = src.key | 1; + let first_dest = i; + while i < half_moves.len() && half_moves[i].key == dest_key { + i += 1; + } + let last_dest = i; + + log::debug!( + "halfmove match: src {:?} dests {:?}", + src, + &half_moves[first_dest..last_dest] + ); + + // Determine the ProgPoint where moves on this (from, to) + // edge should go: + // - If there is more than one in-edge to `to`, then + // `from` must have only one out-edge; moves go at tail of + // `from` just before last Branch/Ret. + // - Otherwise, there must be at most one in-edge to `to`, + // and moves go at start of `to`. + let from_last_insn = self.func.block_insns(src.from_block()).last(); + let to_first_insn = self.func.block_insns(src.to_block()).first(); + let from_is_ret = self.func.is_ret(from_last_insn); + let to_is_entry = self.func.entry_block() == src.to_block(); + let from_outs = + self.func.block_succs(src.from_block()).len() + if from_is_ret { 1 } else { 0 }; + let to_ins = + self.func.block_preds(src.to_block()).len() + if to_is_entry { 1 } else { 0 }; + + let (insertion_point, prio) = if to_ins > 1 && from_outs <= 1 { + ( + // N.B.: "after" the branch should be interpreted + // by the user as happening before the actual + // branching action, but after the branch reads + // all necessary inputs. It's necessary to do this + // rather than to place the moves before the + // branch because the branch may have other + // actions than just the control-flow transfer, + // and these other actions may require other + // inputs (which should be read before the "edge" + // moves). + // + // Edits will only appear after the last (branch) + // instruction if the block has only a single + // successor; we do not expect the user to somehow + // duplicate or predicate these. + ProgPoint::after(from_last_insn), + InsertMovePrio::OutEdgeMoves, + ) + } else if to_ins <= 1 { + ( + ProgPoint::before(to_first_insn), + InsertMovePrio::InEdgeMoves, + ) + } else { + panic!( + "Critical edge: can't insert moves between blocks {:?} and {:?}", + src.from_block(), + src.to_block() + ); + }; + + let mut last = None; + for dest in first_dest..last_dest { + let dest = &half_moves[dest]; + debug_assert!(last != Some(dest.alloc)); + self.insert_move(insertion_point, prio, src.alloc, dest.alloc); + last = Some(dest.alloc); + } + } + + // Handle multi-fixed-reg constraints by copying. + for (progpoint, from_preg, to_preg) in + std::mem::replace(&mut self.multi_fixed_reg_fixups, vec![]) + { + log::debug!( + "multi-fixed-move constraint at {:?} from p{} to p{}", + progpoint, + from_preg.index(), + to_preg.index() + ); + self.insert_move( + progpoint, + InsertMovePrio::MultiFixedReg, + Allocation::reg(self.pregs[from_preg.index()].reg), + Allocation::reg(self.pregs[to_preg.index()].reg), + ); + } + + // Handle outputs that reuse inputs: copy beforehand, then set + // input's alloc to output's. + // + // Note that the output's allocation may not *actually* be + // valid until InstPosition::After, but the reused input may + // occur at InstPosition::Before. This may appear incorrect, + // but we make it work by ensuring that all *other* inputs are + // extended to InstPosition::After so that the def will not + // interfere. (The liveness computation code does this -- we + // do not require the user to do so.) + // + // One might ask: why not insist that input-reusing defs occur + // at InstPosition::Before? this would be correct, but would + // mean that the reused input and the reusing output + // interfere, *guaranteeing* that every such case would + // require a move. This is really bad on ISAs (like x86) where + // reused inputs are ubiquitous. + // + // Another approach might be to put the def at Before, and + // trim the reused input's liverange back to the previous + // instruction's After. This is kind of OK until (i) a block + // boundary occurs between the prior inst and this one, or + // (ii) any moves/spills/reloads occur between the two + // instructions. We really do need the input to be live at + // this inst's Before. + // + // In principle what we really need is a "BeforeBefore" + // program point, but we don't want to introduce that + // everywhere and pay the cost of twice as many ProgPoints + // throughout the allocator. + // + // Or we could introduce a separate move instruction -- this + // is the approach that regalloc.rs takes with "mod" operands + // -- but that is also costly. + // + // So we take this approach (invented by IonMonkey -- somewhat + // hard to discern, though see [0] for a comment that makes + // this slightly less unclear) to avoid interference between + // the actual reused input and reusing output, ensure + // interference (hence no incorrectness) between other inputs + // and the reusing output, and not require a separate explicit + // move instruction. + // + // [0] https://searchfox.org/mozilla-central/rev/3a798ef9252896fb389679f06dd3203169565af0/js/src/jit/shared/Lowering-shared-inl.h#108-110 + for inst in reuse_input_insts { + let mut input_reused: SmallVec<[usize; 4]> = smallvec![]; + for output_idx in 0..self.func.inst_operands(inst).len() { + let operand = self.func.inst_operands(inst)[output_idx]; + if let OperandPolicy::Reuse(input_idx) = operand.policy() { + debug_assert!(!input_reused.contains(&input_idx)); + debug_assert_eq!(operand.pos(), OperandPos::After); + input_reused.push(input_idx); + let input_alloc = self.get_alloc(inst, input_idx); + let output_alloc = self.get_alloc(inst, output_idx); + log::debug!( + "reuse-input inst {:?}: output {} has alloc {:?}, input {} has alloc {:?}", + inst, + output_idx, + output_alloc, + input_idx, + input_alloc + ); + if input_alloc != output_alloc { + if log::log_enabled!(log::Level::Debug) { + self.annotate( + ProgPoint::before(inst), + format!(" reuse-input-copy: {} -> {}", input_alloc, output_alloc), + ); + } + self.insert_move( + ProgPoint::before(inst), + InsertMovePrio::ReusedInput, + input_alloc, + output_alloc, + ); + self.set_alloc(inst, input_idx, output_alloc); + } + } + } + } + } + + fn resolve_inserted_moves(&mut self) { + // For each program point, gather all moves together. Then + // resolve (see cases below). + let mut i = 0; + self.inserted_moves + .sort_by_key(|m| (m.pos.to_index(), m.prio)); + while i < self.inserted_moves.len() { + let start = i; + let pos = self.inserted_moves[i].pos; + let prio = self.inserted_moves[i].prio; + while i < self.inserted_moves.len() + && self.inserted_moves[i].pos == pos + && self.inserted_moves[i].prio == prio + { + i += 1; + } + let moves = &self.inserted_moves[start..i]; + + // Get the regclass from one of the moves. + let regclass = moves[0].from_alloc.class(); + + // All moves in `moves` semantically happen in + // parallel. Let's resolve these to a sequence of moves + // that can be done one at a time. + let mut parallel_moves = ParallelMoves::new(Allocation::reg( + self.env.scratch_by_class[regclass as u8 as usize], + )); + log::debug!("parallel moves at pos {:?} prio {:?}", pos, prio); + for m in moves { + if m.from_alloc != m.to_alloc { + log::debug!(" {} -> {}", m.from_alloc, m.to_alloc,); + parallel_moves.add(m.from_alloc, m.to_alloc); + } + } + + let resolved = parallel_moves.resolve(); + + for (src, dst) in resolved { + log::debug!(" resolved: {} -> {}", src, dst); + self.add_edit(pos, prio, Edit::Move { from: src, to: dst }); + } + } + + // Add edits to describe blockparam locations too. This is + // required by the checker. This comes after any edge-moves. + self.blockparam_allocs + .sort_by_key(|&(block, idx, _, _)| (block, idx)); + self.stats.blockparam_allocs_count = self.blockparam_allocs.len(); + let mut i = 0; + while i < self.blockparam_allocs.len() { + let start = i; + let block = self.blockparam_allocs[i].0; + while i < self.blockparam_allocs.len() && self.blockparam_allocs[i].0 == block { + i += 1; + } + let params = &self.blockparam_allocs[start..i]; + let vregs = params + .iter() + .map(|(_, _, vreg_idx, _)| self.vregs[vreg_idx.index()].reg) + .collect::>(); + let allocs = params + .iter() + .map(|(_, _, _, alloc)| *alloc) + .collect::>(); + assert_eq!(vregs.len(), self.func.block_params(block).len()); + assert_eq!(allocs.len(), self.func.block_params(block).len()); + self.add_edit( + self.cfginfo.block_entry[block.index()], + InsertMovePrio::BlockParam, + Edit::BlockParams { vregs, allocs }, + ); + } + + // Ensure edits are in sorted ProgPoint order. + self.edits.sort_by_key(|&(pos, prio, _)| (pos, prio)); + self.stats.edits_count = self.edits.len(); + + // Add debug annotations. + if log::log_enabled!(log::Level::Debug) { + for i in 0..self.edits.len() { + let &(pos, _, ref edit) = &self.edits[i]; + match edit { + &Edit::Move { from, to } => { + self.annotate( + ProgPoint::from_index(pos), + format!("move {} -> {}", from, to), + ); + } + &Edit::BlockParams { + ref vregs, + ref allocs, + } => { + let s = format!("blockparams vregs:{:?} allocs:{:?}", vregs, allocs); + self.annotate(ProgPoint::from_index(pos), s); + } + } + } + } + } + + fn add_edit(&mut self, pos: ProgPoint, prio: InsertMovePrio, edit: Edit) { + match &edit { + &Edit::Move { from, to } if from == to => return, + _ => {} + } + + self.edits.push((pos.to_index(), prio, edit)); + } + + fn compute_stackmaps(&mut self) {} + + pub(crate) fn init(&mut self) -> Result<(), RegAllocError> { + self.create_pregs_and_vregs(); + self.compute_liveness(); + self.compute_hot_code(); + self.merge_vreg_bundles(); + self.queue_bundles(); + if log::log_enabled!(log::Level::Debug) { + self.dump_state(); + } + Ok(()) + } + + pub(crate) fn run(&mut self) -> Result<(), RegAllocError> { + self.process_bundles(); + self.try_allocating_regs_for_spilled_bundles(); + self.allocate_spillslots(); + self.apply_allocations_and_insert_moves(); + self.resolve_inserted_moves(); + self.compute_stackmaps(); + Ok(()) + } + + fn annotate(&mut self, progpoint: ProgPoint, s: String) { + if log::log_enabled!(log::Level::Debug) { + self.debug_annotations + .entry(progpoint) + .or_insert_with(|| vec![]) + .push(s); + } + } + + fn dump_results(&self) { + log::debug!("=== REGALLOC RESULTS ==="); + for block in 0..self.func.blocks() { + let block = Block::new(block); + log::debug!( + "block{}: [succs {:?} preds {:?}]", + block.index(), + self.func + .block_succs(block) + .iter() + .map(|b| b.index()) + .collect::>(), + self.func + .block_preds(block) + .iter() + .map(|b| b.index()) + .collect::>() + ); + for inst in self.func.block_insns(block).iter() { + for annotation in self + .debug_annotations + .get(&ProgPoint::before(inst)) + .map(|v| &v[..]) + .unwrap_or(&[]) + { + log::debug!(" inst{}-pre: {}", inst.index(), annotation); + } + let ops = self + .func + .inst_operands(inst) + .iter() + .map(|op| format!("{}", op)) + .collect::>(); + let clobbers = self + .func + .inst_clobbers(inst) + .iter() + .map(|preg| format!("{}", preg)) + .collect::>(); + let allocs = (0..ops.len()) + .map(|i| format!("{}", self.get_alloc(inst, i))) + .collect::>(); + let opname = if self.func.is_branch(inst) { + "br" + } else if self.func.is_call(inst) { + "call" + } else if self.func.is_ret(inst) { + "ret" + } else { + "op" + }; + let args = ops + .iter() + .zip(allocs.iter()) + .map(|(op, alloc)| format!("{} [{}]", op, alloc)) + .collect::>(); + let clobbers = if clobbers.is_empty() { + "".to_string() + } else { + format!(" [clobber: {}]", clobbers.join(", ")) + }; + log::debug!( + " inst{}: {} {}{}", + inst.index(), + opname, + args.join(", "), + clobbers + ); + for annotation in self + .debug_annotations + .get(&ProgPoint::after(inst)) + .map(|v| &v[..]) + .unwrap_or(&[]) + { + log::debug!(" inst{}-post: {}", inst.index(), annotation); + } + } + } + } +} + +pub fn run(func: &F, mach_env: &MachineEnv) -> Result { + let cfginfo = CFGInfo::new(func); + validate_ssa(func, &cfginfo)?; + + let mut env = Env::new(func, mach_env, cfginfo); + env.init()?; + + env.run()?; + + if log::log_enabled!(log::Level::Debug) { + env.dump_results(); + } + + Ok(Output { + edits: env + .edits + .into_iter() + .map(|(pos, _, edit)| (ProgPoint::from_index(pos), edit)) + .collect(), + allocs: env.allocs, + inst_alloc_offsets: env.inst_alloc_offsets, + num_spillslots: env.num_spillslots as usize, + stats: env.stats, + }) +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 00000000..0750a824 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,780 @@ +/* + * The fellowing license applies to this file, which derives many + * details (register and constraint definitions, for example) from the + * files `BacktrackingAllocator.h`, `BacktrackingAllocator.cpp`, + * `LIR.h`, and possibly definitions in other related files in + * `js/src/jit/`: + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#![allow(dead_code)] + +pub mod bitvec; +pub mod cfg; +pub mod domtree; +pub mod ion; +pub mod moves; +pub mod postorder; +pub mod ssa; + +#[macro_use] +pub mod index; +pub use index::{Block, Inst, InstRange, InstRangeIter}; + +pub mod checker; +pub mod fuzzing; + +/// Register classes. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum RegClass { + Int = 0, + Float = 1, +} + +/// A physical register. Contains a physical register number and a class. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct PReg(u8, RegClass); + +impl PReg { + pub const MAX_BITS: usize = 5; + pub const MAX: usize = (1 << Self::MAX_BITS) - 1; + + /// Create a new PReg. The `hw_enc` range is 6 bits. + #[inline(always)] + pub fn new(hw_enc: usize, class: RegClass) -> Self { + assert!(hw_enc <= Self::MAX); + PReg(hw_enc as u8, class) + } + + /// The physical register number, as encoded by the ISA for the particular register class. + #[inline(always)] + pub fn hw_enc(self) -> usize { + self.0 as usize + } + + /// The register class. + #[inline(always)] + pub fn class(self) -> RegClass { + self.1 + } + + /// Get an index into the (not necessarily contiguous) index space of + /// all physical registers. Allows one to maintain an array of data for + /// all PRegs and index it efficiently. + #[inline(always)] + pub fn index(self) -> usize { + ((self.1 as u8 as usize) << 6) | (self.0 as usize) + } + + #[inline(always)] + pub fn from_index(index: usize) -> Self { + let class = (index >> 6) & 1; + let class = match class { + 0 => RegClass::Int, + 1 => RegClass::Float, + _ => unreachable!(), + }; + let index = index & Self::MAX; + PReg::new(index, class) + } + + #[inline(always)] + pub fn invalid() -> Self { + PReg::new(Self::MAX, RegClass::Int) + } +} + +impl std::fmt::Debug for PReg { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "PReg(hw = {}, class = {:?}, index = {})", + self.hw_enc(), + self.class(), + self.index() + ) + } +} + +impl std::fmt::Display for PReg { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + let class = match self.class() { + RegClass::Int => "i", + RegClass::Float => "f", + }; + write!(f, "p{}{}", self.hw_enc(), class) + } +} + +/// A virtual register. Contains a virtual register number and a class. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct VReg(u32); + +impl VReg { + pub const MAX_BITS: usize = 20; + pub const MAX: usize = (1 << Self::MAX_BITS) - 1; + + #[inline(always)] + pub fn new(virt_reg: usize, class: RegClass) -> Self { + assert!(virt_reg <= Self::MAX); + VReg(((virt_reg as u32) << 1) | (class as u8 as u32)) + } + + #[inline(always)] + pub fn vreg(self) -> usize { + (self.0 >> 1) as usize + } + + #[inline(always)] + pub fn class(self) -> RegClass { + match self.0 & 1 { + 0 => RegClass::Int, + 1 => RegClass::Float, + _ => unreachable!(), + } + } + + #[inline(always)] + pub fn invalid() -> Self { + VReg::new(Self::MAX, RegClass::Int) + } +} + +impl std::fmt::Debug for VReg { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "VReg(vreg = {}, class = {:?})", + self.vreg(), + self.class() + ) + } +} + +impl std::fmt::Display for VReg { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "v{}", self.vreg()) + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SpillSlot(u32); + +impl SpillSlot { + #[inline(always)] + pub fn new(slot: usize, class: RegClass) -> Self { + assert!(slot < (1 << 24)); + SpillSlot((slot as u32) | (class as u8 as u32) << 24) + } + #[inline(always)] + pub fn index(self) -> usize { + (self.0 & 0x00ffffff) as usize + } + #[inline(always)] + pub fn class(self) -> RegClass { + match (self.0 >> 24) as u8 { + 0 => RegClass::Int, + 1 => RegClass::Float, + _ => unreachable!(), + } + } + #[inline(always)] + pub fn plus(self, offset: usize) -> Self { + SpillSlot::new(self.index() + offset, self.class()) + } +} + +impl std::fmt::Display for SpillSlot { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "stack{}", self.index()) + } +} + +/// An `Operand` encodes everything about a mention of a register in +/// an instruction: virtual register number, and any constraint/policy +/// that applies to the register at this program point. +/// +/// An Operand may be a use or def (this corresponds to `LUse` and +/// `LAllocation` in Ion). +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct Operand { + /// Bit-pack into 31 bits. This allows a `Reg` to encode an + /// `Operand` or an `Allocation` in 32 bits. + /// + /// op-or-alloc:1 pos:2 kind:1 policy:2 class:1 preg:5 vreg:20 + bits: u32, +} + +impl Operand { + #[inline(always)] + pub fn new(vreg: VReg, policy: OperandPolicy, kind: OperandKind, pos: OperandPos) -> Self { + let (preg_field, policy_field): (u32, u32) = match policy { + OperandPolicy::Any => (0, 0), + OperandPolicy::Reg => (0, 1), + OperandPolicy::FixedReg(preg) => { + assert_eq!(preg.class(), vreg.class()); + (preg.hw_enc() as u32, 2) + } + OperandPolicy::Reuse(which) => { + assert!(which <= PReg::MAX); + (which as u32, 3) + } + }; + let class_field = vreg.class() as u8 as u32; + let pos_field = pos as u8 as u32; + let kind_field = kind as u8 as u32; + Operand { + bits: vreg.vreg() as u32 + | (preg_field << 20) + | (class_field << 25) + | (policy_field << 26) + | (kind_field << 28) + | (pos_field << 29), + } + } + + #[inline(always)] + pub fn reg_use(vreg: VReg) -> Self { + Operand::new( + vreg, + OperandPolicy::Reg, + OperandKind::Use, + OperandPos::Before, + ) + } + #[inline(always)] + pub fn reg_use_at_end(vreg: VReg) -> Self { + Operand::new(vreg, OperandPolicy::Reg, OperandKind::Use, OperandPos::Both) + } + #[inline(always)] + pub fn reg_def(vreg: VReg) -> Self { + Operand::new( + vreg, + OperandPolicy::Reg, + OperandKind::Def, + OperandPos::After, + ) + } + #[inline(always)] + pub fn reg_def_at_start(vreg: VReg) -> Self { + Operand::new(vreg, OperandPolicy::Reg, OperandKind::Def, OperandPos::Both) + } + #[inline(always)] + pub fn reg_temp(vreg: VReg) -> Self { + Operand::new(vreg, OperandPolicy::Reg, OperandKind::Def, OperandPos::Both) + } + #[inline(always)] + pub fn reg_reuse_def(vreg: VReg, idx: usize) -> Self { + Operand::new( + vreg, + OperandPolicy::Reuse(idx), + OperandKind::Def, + OperandPos::Both, + ) + } + #[inline(always)] + pub fn reg_fixed_use(vreg: VReg, preg: PReg) -> Self { + Operand::new( + vreg, + OperandPolicy::FixedReg(preg), + OperandKind::Use, + OperandPos::Before, + ) + } + #[inline(always)] + pub fn reg_fixed_def(vreg: VReg, preg: PReg) -> Self { + Operand::new( + vreg, + OperandPolicy::FixedReg(preg), + OperandKind::Def, + OperandPos::After, + ) + } + + #[inline(always)] + pub fn vreg(self) -> VReg { + let vreg_idx = ((self.bits as usize) & VReg::MAX) as usize; + VReg::new(vreg_idx, self.class()) + } + + #[inline(always)] + pub fn class(self) -> RegClass { + let class_field = (self.bits >> 25) & 1; + match class_field { + 0 => RegClass::Int, + 1 => RegClass::Float, + _ => unreachable!(), + } + } + + #[inline(always)] + pub fn kind(self) -> OperandKind { + let kind_field = (self.bits >> 28) & 1; + match kind_field { + 0 => OperandKind::Def, + 1 => OperandKind::Use, + _ => unreachable!(), + } + } + + #[inline(always)] + pub fn pos(self) -> OperandPos { + let pos_field = (self.bits >> 29) & 3; + match pos_field { + 0 => OperandPos::Before, + 1 => OperandPos::After, + 2 => OperandPos::Both, + _ => unreachable!(), + } + } + + #[inline(always)] + pub fn policy(self) -> OperandPolicy { + let policy_field = (self.bits >> 26) & 3; + let preg_field = ((self.bits >> 20) as usize) & PReg::MAX; + match policy_field { + 0 => OperandPolicy::Any, + 1 => OperandPolicy::Reg, + 2 => OperandPolicy::FixedReg(PReg::new(preg_field, self.class())), + 3 => OperandPolicy::Reuse(preg_field), + _ => unreachable!(), + } + } + + #[inline(always)] + pub fn bits(self) -> u32 { + self.bits + } + + #[inline(always)] + pub fn from_bits(bits: u32) -> Self { + Operand { bits } + } +} + +impl std::fmt::Debug for Operand { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "Operand(vreg = {:?}, class = {:?}, kind = {:?}, pos = {:?}, policy = {:?})", + self.vreg().vreg(), + self.class(), + self.kind(), + self.pos(), + self.policy() + ) + } +} + +impl std::fmt::Display for Operand { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "{:?}@{:?}: {} {}", + self.kind(), + self.pos(), + self.vreg(), + self.policy() + ) + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum OperandPolicy { + /// Any location is fine (register or stack slot). + Any, + /// Operand must be in a register. Register is read-only for Uses. + Reg, + /// Operand must be in a fixed register. + FixedReg(PReg), + /// On defs only: reuse a use's register. Which use is given by `preg` field. + Reuse(usize), +} + +impl std::fmt::Display for OperandPolicy { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Self::Any => write!(f, "any"), + Self::Reg => write!(f, "reg"), + Self::FixedReg(preg) => write!(f, "fixed({})", preg), + Self::Reuse(idx) => write!(f, "reuse({})", idx), + } + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum OperandKind { + Def = 0, + Use = 1, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum OperandPos { + Before = 0, + After = 1, + Both = 2, +} + +/// An Allocation represents the end result of regalloc for an +/// Operand. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Allocation { + /// Bit-pack in 31 bits: + /// + /// op-or-alloc:1 kind:2 index:29 + bits: u32, +} + +impl std::fmt::Debug for Allocation { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "Allocation(kind = {:?}, index = {})", + self.kind(), + self.index() + ) + } +} + +impl std::fmt::Display for Allocation { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self.kind() { + AllocationKind::None => write!(f, "none"), + AllocationKind::Reg => write!(f, "{}", self.as_reg().unwrap()), + AllocationKind::Stack => write!(f, "{}", self.as_stack().unwrap()), + } + } +} + +impl Allocation { + #[inline(always)] + pub(crate) fn new(kind: AllocationKind, index: usize) -> Self { + Self { + bits: ((kind as u8 as u32) << 29) | (index as u32), + } + } + + #[inline(always)] + pub fn none() -> Allocation { + Allocation::new(AllocationKind::None, 0) + } + + #[inline(always)] + pub fn reg(preg: PReg) -> Allocation { + Allocation::new(AllocationKind::Reg, preg.index()) + } + + #[inline(always)] + pub fn stack(slot: SpillSlot) -> Allocation { + Allocation::new(AllocationKind::Stack, slot.0 as usize) + } + + #[inline(always)] + pub fn kind(self) -> AllocationKind { + match (self.bits >> 29) & 3 { + 0 => AllocationKind::None, + 1 => AllocationKind::Reg, + 2 => AllocationKind::Stack, + _ => unreachable!(), + } + } + + #[inline(always)] + pub fn index(self) -> usize { + (self.bits & ((1 << 29) - 1)) as usize + } + + #[inline(always)] + pub fn as_reg(self) -> Option { + if self.kind() == AllocationKind::Reg { + Some(PReg::from_index(self.index())) + } else { + None + } + } + + #[inline(always)] + pub fn as_stack(self) -> Option { + if self.kind() == AllocationKind::Stack { + Some(SpillSlot(self.index() as u32)) + } else { + None + } + } + + #[inline(always)] + pub fn bits(self) -> u32 { + self.bits + } + + #[inline(always)] + pub fn from_bits(bits: u32) -> Self { + Self { bits } + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[repr(u8)] +pub enum AllocationKind { + None = 0, + Reg = 1, + Stack = 2, +} + +impl Allocation { + #[inline(always)] + pub fn class(self) -> RegClass { + match self.kind() { + AllocationKind::None => panic!("Allocation::None has no class"), + AllocationKind::Reg => self.as_reg().unwrap().class(), + AllocationKind::Stack => self.as_stack().unwrap().class(), + } + } +} + +/// A trait defined by the regalloc client to provide access to its +/// machine-instruction / CFG representation. +pub trait Function { + // ------------- + // CFG traversal + // ------------- + + /// How many instructions are there? + fn insts(&self) -> usize; + + /// How many blocks are there? + fn blocks(&self) -> usize; + + /// Get the index of the entry block. + fn entry_block(&self) -> Block; + + /// Provide the range of instruction indices contained in each block. + fn block_insns(&self, block: Block) -> InstRange; + + /// Get CFG successors for a given block. + fn block_succs(&self, block: Block) -> &[Block]; + + /// Get the CFG predecessors for a given block. + fn block_preds(&self, block: Block) -> &[Block]; + + /// Get the block parameters for a given block. + fn block_params(&self, block: Block) -> &[VReg]; + + /// Determine whether an instruction is a call instruction. This is used + /// only for splitting heuristics. + fn is_call(&self, insn: Inst) -> bool; + + /// Determine whether an instruction is a return instruction. + fn is_ret(&self, insn: Inst) -> bool; + + /// Determine whether an instruction is the end-of-block + /// branch. If so, its operands *must* be the block parameters for + /// each of its block's `block_succs` successor blocks, in order. + fn is_branch(&self, insn: Inst) -> bool; + + /// Determine whether an instruction is a safepoint and requires a stackmap. + fn is_safepoint(&self, insn: Inst) -> bool; + + /// Determine whether an instruction is a move; if so, return the + /// vregs for (src, dst). + fn is_move(&self, insn: Inst) -> Option<(VReg, VReg)>; + + // -------------------------- + // Instruction register slots + // -------------------------- + + /// Get the Operands for an instruction. + fn inst_operands(&self, insn: Inst) -> &[Operand]; + + /// Get the clobbers for an instruction. + fn inst_clobbers(&self, insn: Inst) -> &[PReg]; + + /// Get the precise number of `VReg` in use in this function, to allow + /// preallocating data structures. This number *must* be a correct + /// lower-bound, otherwise invalid index failures may happen; it is of + /// course better if it is exact. + fn num_vregs(&self) -> usize; + + // -------------- + // Spills/reloads + // -------------- + + /// How many logical spill slots does the given regclass require? E.g., on + /// a 64-bit machine, spill slots may nominally be 64-bit words, but a + /// 128-bit vector value will require two slots. The regalloc will always + /// align on this size. + /// + /// This passes the associated virtual register to the client as well, + /// because the way in which we spill a real register may depend on the + /// value that we are using it for. E.g., if a machine has V128 registers + /// but we also use them for F32 and F64 values, we may use a different + /// store-slot size and smaller-operand store/load instructions for an F64 + /// than for a true V128. + fn spillslot_size(&self, regclass: RegClass, for_vreg: VReg) -> usize; + + /// When providing a spillslot number for a multi-slot spillslot, + /// do we provide the first or the last? This is usually related + /// to which direction the stack grows and different clients may + /// have different preferences. + fn multi_spillslot_named_by_last_slot(&self) -> bool { + false + } +} + +/// A position before or after an instruction. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[repr(u8)] +pub enum InstPosition { + Before = 0, + After = 1, +} + +/// A program point: a single point before or after a given instruction. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct ProgPoint { + pub inst: Inst, + pub pos: InstPosition, +} + +impl ProgPoint { + pub fn before(inst: Inst) -> Self { + Self { + inst, + pos: InstPosition::Before, + } + } + + pub fn after(inst: Inst) -> Self { + Self { + inst, + pos: InstPosition::After, + } + } + + pub fn next(self) -> ProgPoint { + match self.pos { + InstPosition::Before => ProgPoint { + inst: self.inst, + pos: InstPosition::After, + }, + InstPosition::After => ProgPoint { + inst: self.inst.next(), + pos: InstPosition::Before, + }, + } + } + + pub fn prev(self) -> ProgPoint { + match self.pos { + InstPosition::Before => ProgPoint { + inst: self.inst.prev(), + pos: InstPosition::After, + }, + InstPosition::After => ProgPoint { + inst: self.inst, + pos: InstPosition::Before, + }, + } + } + + pub fn to_index(self) -> u32 { + debug_assert!(self.inst.index() <= ((1 << 31) - 1)); + ((self.inst.index() as u32) << 1) | (self.pos as u8 as u32) + } + + pub fn from_index(index: u32) -> Self { + let inst = Inst::new((index >> 1) as usize); + let pos = match index & 1 { + 0 => InstPosition::Before, + 1 => InstPosition::After, + _ => unreachable!(), + }; + Self { inst, pos } + } +} + +/// An instruction to insert into the program to perform some data movement. +#[derive(Clone, Debug)] +pub enum Edit { + /// Move one allocation to another. Each allocation may be a + /// register or a stack slot (spillslot). + Move { from: Allocation, to: Allocation }, + /// Define blockparams' locations. Note that this is not typically + /// turned into machine code, but can be useful metadata (e.g. for + /// the checker). + BlockParams { + vregs: Vec, + allocs: Vec, + }, +} + +/// A machine envrionment tells the register allocator which registers +/// are available to allocate and what register may be used as a +/// scratch register for each class, and some other miscellaneous info +/// as well. +#[derive(Clone, Debug)] +pub struct MachineEnv { + regs: Vec, + regs_by_class: Vec>, + scratch_by_class: Vec, +} + +/// The output of the register allocator. +#[derive(Clone, Debug)] +pub struct Output { + /// How many spillslots are needed in the frame? + pub num_spillslots: usize, + /// Edits (insertions or removals). Guaranteed to be sorted by + /// program point. + pub edits: Vec<(ProgPoint, Edit)>, + /// Allocations for each operand. Mapping from instruction to + /// allocations provided by `inst_alloc_offsets` below. + pub allocs: Vec, + /// Allocation offset in `allocs` for each instruction. + pub inst_alloc_offsets: Vec, + + /// Internal stats from the allocator. + pub stats: ion::Stats, +} + +impl Output { + pub fn inst_allocs(&self, inst: Inst) -> &[Allocation] { + let start = self.inst_alloc_offsets[inst.index()] as usize; + let end = if inst.index() + 1 == self.inst_alloc_offsets.len() { + self.allocs.len() + } else { + self.inst_alloc_offsets[inst.index() + 1] as usize + }; + &self.allocs[start..end] + } +} + +/// An error that prevents allocation. +#[derive(Clone, Debug)] +pub enum RegAllocError { + /// Invalid SSA for given vreg at given inst: multiple defs or + /// illegal use. `inst` may be `Inst::invalid()` if this concerns + /// a block param. + SSA(VReg, Inst), + /// Invalid basic block: does not end in branch/ret, or contains a + /// branch/ret in the middle. + BB(Block), + /// Invalid branch: operand count does not match sum of block + /// params of successor blocks. + Branch(Inst), +} + +impl std::fmt::Display for RegAllocError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{:?}", self) + } +} + +impl std::error::Error for RegAllocError {} + +pub fn run(func: &F, env: &MachineEnv) -> Result { + ion::run(func, env) +} diff --git a/src/moves.rs b/src/moves.rs new file mode 100644 index 00000000..a5f70be2 --- /dev/null +++ b/src/moves.rs @@ -0,0 +1,199 @@ +use crate::Allocation; +use smallvec::{smallvec, SmallVec}; + +pub type MoveVec = SmallVec<[(Allocation, Allocation); 16]>; + +/// A `ParallelMoves` represents a list of alloc-to-alloc moves that +/// must happen in parallel -- i.e., all reads of sources semantically +/// happen before all writes of destinations, and destinations are +/// allowed to overwrite sources. It can compute a list of sequential +/// moves that will produce the equivalent data movement, possibly +/// using a scratch register if one is necessary. +pub struct ParallelMoves { + parallel_moves: MoveVec, + scratch: Allocation, +} + +impl ParallelMoves { + pub fn new(scratch: Allocation) -> Self { + Self { + parallel_moves: smallvec![], + scratch, + } + } + + pub fn add(&mut self, from: Allocation, to: Allocation) { + self.parallel_moves.push((from, to)); + } + + fn sources_overlap_dests(&self) -> bool { + // Assumes `parallel_moves` has already been sorted in `resolve()` below. + for &(_, dst) in &self.parallel_moves { + if self + .parallel_moves + .binary_search_by_key(&dst, |&(src, _)| src) + .is_ok() + { + return true; + } + } + false + } + + pub fn resolve(mut self) -> MoveVec { + // Easy case: zero or one move. Just return our vec. + if self.parallel_moves.len() <= 1 { + return self.parallel_moves; + } + + // Sort moves by source so that we can efficiently test for + // presence. + self.parallel_moves.sort(); + + // Do any dests overlap sources? If not, we can also just + // return the list. + if !self.sources_overlap_dests() { + return self.parallel_moves; + } + + // General case: some moves overwrite dests that other moves + // read as sources. We'll use a general algorithm. + // + // *Important property*: because we expect that each register + // has only one writer (otherwise the effect of the parallel + // move is undefined), each move can only block one other move + // (with its one source corresponding to the one writer of + // that source). Thus, we *can only have simple cycles*: there + // are no SCCs that are more complex than that. We leverage + // this fact below to avoid having to do a full Tarjan SCC DFS + // (with lowest-index computation, etc.): instead, as soon as + // we find a cycle, we know we have the full cycle and we can + // do a cyclic move sequence and continue. + + // Sort moves by destination and check that each destination + // has only one writer. + self.parallel_moves.sort_by_key(|&(_, dst)| dst); + if cfg!(debug) { + let mut last_dst = None; + for &(_, dst) in &self.parallel_moves { + if last_dst.is_some() { + assert!(last_dst.unwrap() != dst); + } + last_dst = Some(dst); + } + } + + // Construct a mapping from move indices to moves they must + // come before. Any given move must come before a move that + // overwrites its destination; we have moves sorted by dest + // above so we can efficiently find such a move, if any. + let mut must_come_before: SmallVec<[Option; 16]> = + smallvec![None; self.parallel_moves.len()]; + for (i, &(src, _)) in self.parallel_moves.iter().enumerate() { + if let Ok(move_to_dst_idx) = self + .parallel_moves + .binary_search_by_key(&src, |&(_, dst)| dst) + { + must_come_before[i] = Some(move_to_dst_idx); + } + } + + // Do a simple stack-based DFS and emit moves in postorder, + // then reverse at the end for RPO. Unlike Tarjan's SCC + // algorithm, we can emit a cycle as soon as we find one, as + // noted above. + let mut ret: MoveVec = smallvec![]; + let mut stack: SmallVec<[usize; 16]> = smallvec![]; + let mut visited: SmallVec<[bool; 16]> = smallvec![false; self.parallel_moves.len()]; + let mut onstack: SmallVec<[bool; 16]> = smallvec![false; self.parallel_moves.len()]; + + stack.push(0); + onstack[0] = true; + loop { + if stack.is_empty() { + if let Some(next) = visited.iter().position(|&flag| !flag) { + stack.push(next); + onstack[next] = true; + } else { + break; + } + } + + let top = *stack.last().unwrap(); + visited[top] = true; + match must_come_before[top] { + None => { + ret.push(self.parallel_moves[top]); + onstack[top] = false; + stack.pop(); + while let Some(top) = stack.pop() { + ret.push(self.parallel_moves[top]); + onstack[top] = false; + } + } + Some(next) if visited[next] && !onstack[next] => { + ret.push(self.parallel_moves[top]); + onstack[top] = false; + stack.pop(); + while let Some(top) = stack.pop() { + ret.push(self.parallel_moves[top]); + onstack[top] = false; + } + } + Some(next) if !visited[next] && !onstack[next] => { + stack.push(next); + onstack[next] = true; + continue; + } + Some(next) => { + // Found a cycle -- emit a cyclic-move sequence + // for the cycle on the top of stack, then normal + // moves below it. Recall that these moves will be + // reversed in sequence, so from the original + // parallel move set + // + // { B := A, C := B, A := B } + // + // we will generate something like: + // + // A := scratch + // B := A + // C := B + // scratch := C + // + // which will become: + // + // scratch := C + // C := B + // B := A + // A := scratch + let mut last_dst = None; + let mut scratch_src = None; + while let Some(move_idx) = stack.pop() { + onstack[move_idx] = false; + let (mut src, dst) = self.parallel_moves[move_idx]; + if last_dst.is_none() { + scratch_src = Some(src); + src = self.scratch; + } else { + assert_eq!(last_dst.unwrap(), src); + } + ret.push((src, dst)); + + last_dst = Some(dst); + + if move_idx == next { + break; + } + } + if let Some(src) = scratch_src { + ret.push((src, self.scratch)); + } + } + } + } + + ret.reverse(); + ret + } +} diff --git a/src/postorder.rs b/src/postorder.rs new file mode 100644 index 00000000..b5faf90b --- /dev/null +++ b/src/postorder.rs @@ -0,0 +1,51 @@ +//! Fast postorder computation with no allocations (aside from result). + +use crate::Block; +use smallvec::{smallvec, SmallVec}; + +pub fn calculate<'a, SuccFn: Fn(Block) -> &'a [Block]>( + num_blocks: usize, + entry: Block, + succ_blocks: SuccFn, +) -> Vec { + let mut ret = vec![]; + + // State: visited-block map, and explicit DFS stack. + let mut visited = vec![]; + visited.resize(num_blocks, false); + + struct State<'a> { + block: Block, + succs: &'a [Block], + next_succ: usize, + } + let mut stack: SmallVec<[State; 64]> = smallvec![]; + + visited[entry.index()] = true; + stack.push(State { + block: entry, + succs: succ_blocks(entry), + next_succ: 0, + }); + + while let Some(ref mut state) = stack.last_mut() { + // Perform one action: push to new succ, skip an already-visited succ, or pop. + if state.next_succ < state.succs.len() { + let succ = state.succs[state.next_succ]; + state.next_succ += 1; + if !visited[succ.index()] { + visited[succ.index()] = true; + stack.push(State { + block: succ, + succs: succ_blocks(succ), + next_succ: 0, + }); + } + } else { + ret.push(state.block); + stack.pop(); + } + } + + ret +} diff --git a/src/ssa.rs b/src/ssa.rs new file mode 100644 index 00000000..3b0ca143 --- /dev/null +++ b/src/ssa.rs @@ -0,0 +1,87 @@ +//! SSA-related utilities. + +use crate::cfg::CFGInfo; + +use crate::{Block, Function, Inst, OperandKind, RegAllocError}; + +pub fn validate_ssa(f: &F, cfginfo: &CFGInfo) -> Result<(), RegAllocError> { + // Walk the blocks in arbitrary order. Check, for every use, that + // the def is either in the same block in an earlier inst, or is + // defined (by inst or blockparam) in some other block that + // dominates this one. Also check that for every block param and + // inst def, that this is the only def. + let mut defined = vec![false; f.num_vregs()]; + for block in 0..f.blocks() { + let block = Block::new(block); + for blockparam in f.block_params(block) { + if defined[blockparam.vreg()] { + return Err(RegAllocError::SSA(*blockparam, Inst::invalid())); + } + defined[blockparam.vreg()] = true; + } + for iix in f.block_insns(block).iter() { + let operands = f.inst_operands(iix); + for operand in operands { + match operand.kind() { + OperandKind::Use => { + let def_block = if cfginfo.vreg_def_inst[operand.vreg().vreg()].is_valid() { + cfginfo.insn_block[cfginfo.vreg_def_inst[operand.vreg().vreg()].index()] + } else { + cfginfo.vreg_def_blockparam[operand.vreg().vreg()].0 + }; + if def_block.is_invalid() { + return Err(RegAllocError::SSA(operand.vreg(), iix)); + } + if !cfginfo.dominates(def_block, block) { + return Err(RegAllocError::SSA(operand.vreg(), iix)); + } + } + OperandKind::Def => { + if defined[operand.vreg().vreg()] { + return Err(RegAllocError::SSA(operand.vreg(), iix)); + } + defined[operand.vreg().vreg()] = true; + } + } + } + } + } + + // Check that the length of branch args matches the sum of the + // number of blockparams in their succs, and that the end of every + // block ends in this branch or in a ret, and that there are no + // other branches or rets in the middle of the block. + for block in 0..f.blocks() { + let block = Block::new(block); + let insns = f.block_insns(block); + for insn in insns.iter() { + if insn == insns.last() { + if !(f.is_branch(insn) || f.is_ret(insn)) { + return Err(RegAllocError::BB(block)); + } + if f.is_branch(insn) { + let expected = f + .block_succs(block) + .iter() + .map(|&succ| f.block_params(succ).len()) + .sum(); + if f.inst_operands(insn).len() != expected { + return Err(RegAllocError::Branch(insn)); + } + } + } else { + if f.is_branch(insn) || f.is_ret(insn) { + return Err(RegAllocError::BB(block)); + } + } + } + } + + // Check that the entry block has no block args: otherwise it is + // undefined what their value would be. + if f.block_params(f.entry_block()).len() > 0 { + return Err(RegAllocError::BB(f.entry_block())); + } + + Ok(()) +}