Refactor balance override detection (#3239)

# Description With #3238 merged it now becomes apparent why we have such a poor quote verification ratio on arbitrum. All bridged tokens store their `balances` mapping at storage slot 51. # Changes To avoid having to merge another PR every time we find an important token that requires a deeper probing depth I made the probing depth controllable via a CLI parameter. While I was at it I did the same for the token cache size (should have been like that from the start given that the change was trivial with the given architecture). Making the probing depth runtime dependent made using static variables a lot more annoying. Instead I created an `Inner` struct which contains all the static data so now it can be initialized once at runtime without more exotic types like `LazyLock` or `OnceLock`. As long as we ensure that only 1 instance of `Detector` gets created this would be equivalent in terms of memory usage and performance. I also refactored the logic that constructs the strategy list. It should now be dead simple to see what's going on and how to extend it. First we hardcode a list of known non-variable storage slots and afterwards we probe the first N slots starting from known entry points. This is currently just the 0th slot but could theoretically be extended with other relevant slots. Even if it doesn't get extended I think the refactor still makes the code easier. ## How to test e2e tests and units tests still work also added another unit test for a bridged token on arbitrum
cowprotocol · Jan 17, 2025 · df6306c · df6306c
1 parent 395d605
commit df6306c
Show file tree

Hide file tree

Showing 2 changed files with 126 additions and 64 deletions.
diff --git a/crates/shared/src/price_estimation/trade_verifier/balance_overrides.rs b/crates/shared/src/price_estimation/trade_verifier/balance_overrides.rs
@@ -37,19 +37,32 @@ pub struct Arguments {
     /// will take precedence.
     #[clap(long, env, action = clap::ArgAction::Set, default_value_t)]
     pub quote_autodetect_token_balance_overrides: bool,
+
+    /// Controls how many storage slots get probed per storage entry point
+    /// for automatically detecting how to override the balances of a token.
+    #[clap(long, env, action = clap::ArgAction::Set, default_value = "60")]
+    pub quote_autodetect_token_balance_overrides_probing_depth: u8,
+
+    /// Controls for how many tokens we store the result of the automatic
+    /// balance override detection before evicting less used entries.
+    #[clap(long, env, action = clap::ArgAction::Set, default_value = "1000")]
+    pub quote_autodetect_token_balance_overrides_cache_size: usize,
 }
 
 impl Arguments {
-    const CACHE_SIZE: usize = 1000;
-
     /// Creates a balance overrides instance from the current configuration.
     pub fn init(&self, simulator: Arc<dyn CodeSimulating>) -> Arc<dyn BalanceOverriding> {
         Arc::new(BalanceOverrides {
             hardcoded: self.quote_token_balance_overrides.0.clone(),
             detector: self.quote_autodetect_token_balance_overrides.then(|| {
                 (
-                    Detector::new(simulator),
-                    Mutex::new(SizedCache::with_size(Self::CACHE_SIZE)),
+                    Detector::new(
+                        simulator,
+                        self.quote_autodetect_token_balance_overrides_probing_depth,
+                    ),
+                    Mutex::new(SizedCache::with_size(
+                        self.quote_autodetect_token_balance_overrides_cache_size,
+                    )),
                 )
             }),
         })
@@ -61,6 +74,8 @@ impl Display for Arguments {
         let Self {
             quote_token_balance_overrides,
             quote_autodetect_token_balance_overrides,
+            quote_autodetect_token_balance_overrides_probing_depth,
+            quote_autodetect_token_balance_overrides_cache_size,
         } = self;
 
         writeln!(
@@ -73,6 +88,16 @@ impl Display for Arguments {
             "quote_autodetect_token_balance_overrides: {:?}",
             quote_autodetect_token_balance_overrides
         )?;
+        writeln!(
+            f,
+            "quote_autodetect_token_balance_overrides_probing_depth: {:?}",
+            quote_autodetect_token_balance_overrides_probing_depth
+        )?;
+        writeln!(
+            f,
+            "quote_autodetect_token_balance_overrides_cache_size: {:?}",
+            quote_autodetect_token_balance_overrides_cache_size
+        )?;
 
         Ok(())
     }

diff --git a/crates/shared/src/price_estimation/trade_verifier/balance_overrides/detector.rs b/crates/shared/src/price_estimation/trade_verifier/balance_overrides/detector.rs
@@ -8,7 +8,7 @@ use {
     std::{
         collections::HashMap,
         fmt::{self, Debug, Formatter},
-        sync::{Arc, LazyLock},
+        sync::Arc,
     },
     thiserror::Error,
     web3::{signing::keccak256, types::CallRequest},
@@ -18,14 +18,82 @@ use {
 ///
 /// This has the exact same node requirements as trade verification.
 #[derive(Clone)]
-pub struct Detector {
+pub struct Detector(Arc<Inner>);
+
+pub struct Inner {
     simulator: Arc<dyn CodeSimulating>,
+    /// address that we try to override the balances for
+    holder: Address,
+    /// all strategies used to detect successful balance overrides
+    strategies: Vec<StrategyHelper>,
+    /// overrides for all tested strategies
+    state_overrides: HashMap<H256, H256>,
+}
+
+impl std::ops::Deref for Detector {
+    type Target = Inner;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
 }
 
 impl Detector {
     /// Creates a new balance override detector.
-    pub fn new(simulator: Arc<dyn CodeSimulating>) -> Self {
-        Self { simulator }
+    pub fn new(simulator: Arc<dyn CodeSimulating>, probing_depth: u8) -> Self {
+        let holder = {
+            // On a technical note, Ethereum public addresses are, for the most
+            // part, generated by taking the 20 last bytes of a Keccak-256 hash (for
+            // things like contract creation, public address derivation from a
+            // Secp256k1 public key, etc.), so we use one for our heuristics from a
+            // 32-byte digest with no know pre-image, to prevent any weird
+            // interactions with the weird tokens of the world.
+            let mut address = Address::default();
+            address.0.copy_from_slice(&keccak256(b"Moo!")[12..]);
+            address.0[19] = address.0[19].wrapping_sub(1);
+            address
+        };
+
+        let strategies: Vec<_> = {
+            // First test storage slots that don't need guesswork.
+            let mut strategies = vec![
+                Strategy::SoladyMapping,
+                Strategy::SolidityMapping {
+                    slot: U256::from(OPEN_ZEPPELIN_ERC20_UPGRADEABLE),
+                },
+            ];
+
+            // For each entry point probe the first n following slots.
+            let entry_points = [
+                // solc lays out memory linearly starting at 0 by default
+                "0000000000000000000000000000000000000000000000000000000000000000",
+            ];
+            for start_slot in entry_points {
+                let mut slot = U256::from(start_slot);
+                for _ in 0..probing_depth {
+                    strategies.push(Strategy::SolidityMapping { slot });
+                    slot += U256::one();
+                }
+            }
+
+            strategies
+                .into_iter()
+                .enumerate()
+                .map(|(index, strategy)| StrategyHelper::new(strategy, index))
+                .collect()
+        };
+
+        let state_overrides = strategies
+            .iter()
+            .map(|helper| helper.strategy.state_override(&holder, &helper.balance))
+            .collect::<HashMap<_, _>>();
+
+        Self(Arc::new(Inner {
+            simulator,
+            holder,
+            strategies,
+            state_overrides,
+        }))
     }
 
     /// Tries to detect the balance override strategy for the specified token.
@@ -35,12 +103,12 @@ impl Detector {
         let token = dummy_contract!(ERC20, token);
         let call = CallRequest {
             to: Some(token.address()),
-            data: token.methods().balance_of(*HOLDER).m.tx.data,
+            data: token.methods().balance_of(self.holder).m.tx.data,
             ..Default::default()
         };
         let overrides = hashmap! {
             token.address() => StateOverride {
-                state_diff: Some(STORAGE_OVERRIDES.clone()),
+                state_diff: Some(self.state_overrides.clone()),
                 ..Default::default()
             },
         };
@@ -50,7 +118,7 @@ impl Detector {
             .then(|| U256::from_big_endian(&output))
             .ok_or(DetectionError::Decode)?;
 
-        TESTED_STRATEGIES
+        self.strategies
             .iter()
             .find_map(|helper| (helper.balance == balance).then_some(helper.strategy.clone()))
             .ok_or(DetectionError::NotFound)
@@ -67,7 +135,8 @@ struct StrategyHelper {
 }
 
 impl StrategyHelper {
-    fn new(strategy: Strategy, index: u8) -> Self {
+    fn new(strategy: Strategy, index: usize) -> Self {
+        let index = u8::try_from(index).expect("unreasonable amount of strategies used");
         Self {
             strategy,
             // Use an exact value which isn't too large or too small. This helps
@@ -81,56 +150,10 @@ impl StrategyHelper {
     }
 }
 
-/// Storage slot based on OpenZeppelin's ERC20Upgradeable contract [^1].
-///
-/// [^1]: <https://github.com/OpenZeppelin/openzeppelin-contracts-upgradeable/blob/master/contracts/token/ERC20/ERC20Upgradeable.sol#L43-L44>
-static OPEN_ZEPPELIN_ERC20_UPGRADEABLE: &str =
+// <https://github.com/OpenZeppelin/openzeppelin-contracts-upgradeable/blob/master/contracts/token/ERC20/ERC20Upgradeable.sol#L43-L44>
+const OPEN_ZEPPELIN_ERC20_UPGRADEABLE: &str =
     "52c63247e1f47db19d5ce0460030c497f067ca4cebf71ba98eeadabe20bace00";
 
-/// Address which we try to override the balances for.
-static HOLDER: LazyLock<Address> = LazyLock::new(|| {
-    // On a technical note, Ethereum public addresses are, for the most
-    // part, generated by taking the 20 last bytes of a Keccak-256 hash (for
-    // things like contract creation, public address derivation from a
-    // Secp256k1 public key, etc.), so we use one for our heuristics from a
-    // 32-byte digest with no know pre-image, to prevent any weird
-    // interactions with the weird tokens of the world.
-    let mut address = Address::default();
-    address.0.copy_from_slice(&keccak256(b"Moo!")[12..]);
-    address.0[19] = address.0[19].wrapping_sub(1);
-    address
-});
-
-/// All the strategies we use to detect where a token stores the balances.
-static TESTED_STRATEGIES: LazyLock<Vec<StrategyHelper>> = LazyLock::new(|| {
-    const FIRST_N_SLOTS: u8 = 25;
-
-    // This is a pretty unsophisticated strategy where we basically try a
-    // bunch of different slots and see which one sticks. We try balance
-    // mappings for the first `TRIES` slots; each with a unique value.
-    (0..FIRST_N_SLOTS).map(|i| {
-        let strategy = Strategy::SolidityMapping { slot: U256::from(i) };
-        StrategyHelper::new(strategy, i)
-    })
-    // Afterwards we try hardcoded storage slots based on popular utility
-    // libraries like OpenZeppelin.
-    .chain((FIRST_N_SLOTS..).zip([
-        Strategy::SolidityMapping{ slot: U256::from(OPEN_ZEPPELIN_ERC20_UPGRADEABLE) },
-        Strategy::SoladyMapping,
-    ]).map(|(index, strategy)| {
-        StrategyHelper::new(strategy, index)
-    }))
-    .collect()
-});
-
-/// Storage overrides (storage_slot, value) for all tested strategies.
-static STORAGE_OVERRIDES: LazyLock<HashMap<H256, H256>> = LazyLock::new(|| {
-    TESTED_STRATEGIES
-        .iter()
-        .map(|helper| helper.strategy.state_override(&HOLDER, &helper.balance))
-        .collect::<HashMap<_, _>>()
-});
-
 impl Debug for Detector {
     fn fmt(&self, f: &mut Formatter) -> fmt::Result {
         f.debug_struct("Detector")
@@ -159,10 +182,8 @@ mod tests {
     /// Set `NODE_URL` environment to a mainnet RPC URL.
     #[ignore]
     #[tokio::test]
-    async fn detects_storage_slots() {
-        let detector = Detector {
-            simulator: Arc::new(Web3::new(create_env_test_transport())),
-        };
+    async fn detects_storage_slots_mainnet() {
+        let detector = Detector::new(Arc::new(Web3::new(create_env_test_transport())), 60);
 
         let storage = detector
             .detect(addr!("c02aaa39b223fe8d0a0e5c4f27ead9083c756cc2"))
@@ -187,4 +208,20 @@ mod tests {
             .unwrap();
         assert_eq!(storage, Strategy::SoladyMapping);
     }
+
+    /// Tests that we can detect storage slots by probing the first
+    /// n slots or by checking hardcoded known slots.
+    /// Set `NODE_URL` environment to an arbitrum RPC URL.
+    #[ignore]
+    #[tokio::test]
+    async fn detects_storage_slots_arbitrum() {
+        let detector = Detector::new(Arc::new(Web3::new(create_env_test_transport())), 60);
+
+        // all bridged tokens on arbitrum require a ton of probing
+        let storage = detector
+            .detect(addr!("ff970a61a04b1ca14834a43f5de4533ebddb5cc8"))
+            .await
+            .unwrap();
+        assert_eq!(storage, Strategy::SolidityMapping { slot: 51.into() });
+    }
 }