Skip to content

Commit

Permalink
Refactor balance override detection (#3239)
Browse files Browse the repository at this point in the history
# Description
With #3238 merged it now
becomes apparent why we have such a poor quote verification ratio on
arbitrum. All bridged tokens store their `balances` mapping at storage
slot 51.

# Changes
To avoid having to merge another PR every time we find an important
token that requires a deeper probing depth I made the probing depth
controllable via a CLI parameter.
While I was at it I did the same for the token cache size (should have
been like that from the start given that the change was trivial with the
given architecture).

Making the probing depth runtime dependent made using static variables a
lot more annoying. Instead I created an `Inner` struct which contains
all the static data so now it can be initialized once at runtime without
more exotic types like `LazyLock` or `OnceLock`. As long as we ensure
that only 1 instance of `Detector` gets created this would be equivalent
in terms of memory usage and performance.

I also refactored the logic that constructs the strategy list. It should
now be dead simple to see what's going on and how to extend it. First we
hardcode a list of known non-variable storage slots and afterwards we
probe the first N slots starting from known entry points. This is
currently just the 0th slot but could theoretically be extended with
other relevant slots. Even if it doesn't get extended I think the
refactor still makes the code easier.

## How to test
e2e tests and units tests still work
also added another unit test for a bridged token on arbitrum
  • Loading branch information
MartinquaXD authored Jan 17, 2025
1 parent 395d605 commit df6306c
Show file tree
Hide file tree
Showing 2 changed files with 126 additions and 64 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,19 +37,32 @@ pub struct Arguments {
/// will take precedence.
#[clap(long, env, action = clap::ArgAction::Set, default_value_t)]
pub quote_autodetect_token_balance_overrides: bool,

/// Controls how many storage slots get probed per storage entry point
/// for automatically detecting how to override the balances of a token.
#[clap(long, env, action = clap::ArgAction::Set, default_value = "60")]
pub quote_autodetect_token_balance_overrides_probing_depth: u8,

/// Controls for how many tokens we store the result of the automatic
/// balance override detection before evicting less used entries.
#[clap(long, env, action = clap::ArgAction::Set, default_value = "1000")]
pub quote_autodetect_token_balance_overrides_cache_size: usize,
}

impl Arguments {
const CACHE_SIZE: usize = 1000;

/// Creates a balance overrides instance from the current configuration.
pub fn init(&self, simulator: Arc<dyn CodeSimulating>) -> Arc<dyn BalanceOverriding> {
Arc::new(BalanceOverrides {
hardcoded: self.quote_token_balance_overrides.0.clone(),
detector: self.quote_autodetect_token_balance_overrides.then(|| {
(
Detector::new(simulator),
Mutex::new(SizedCache::with_size(Self::CACHE_SIZE)),
Detector::new(
simulator,
self.quote_autodetect_token_balance_overrides_probing_depth,
),
Mutex::new(SizedCache::with_size(
self.quote_autodetect_token_balance_overrides_cache_size,
)),
)
}),
})
Expand All @@ -61,6 +74,8 @@ impl Display for Arguments {
let Self {
quote_token_balance_overrides,
quote_autodetect_token_balance_overrides,
quote_autodetect_token_balance_overrides_probing_depth,
quote_autodetect_token_balance_overrides_cache_size,
} = self;

writeln!(
Expand All @@ -73,6 +88,16 @@ impl Display for Arguments {
"quote_autodetect_token_balance_overrides: {:?}",
quote_autodetect_token_balance_overrides
)?;
writeln!(
f,
"quote_autodetect_token_balance_overrides_probing_depth: {:?}",
quote_autodetect_token_balance_overrides_probing_depth
)?;
writeln!(
f,
"quote_autodetect_token_balance_overrides_cache_size: {:?}",
quote_autodetect_token_balance_overrides_cache_size
)?;

Ok(())
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use {
std::{
collections::HashMap,
fmt::{self, Debug, Formatter},
sync::{Arc, LazyLock},
sync::Arc,
},
thiserror::Error,
web3::{signing::keccak256, types::CallRequest},
Expand All @@ -18,14 +18,82 @@ use {
///
/// This has the exact same node requirements as trade verification.
#[derive(Clone)]
pub struct Detector {
pub struct Detector(Arc<Inner>);

pub struct Inner {
simulator: Arc<dyn CodeSimulating>,
/// address that we try to override the balances for
holder: Address,
/// all strategies used to detect successful balance overrides
strategies: Vec<StrategyHelper>,
/// overrides for all tested strategies
state_overrides: HashMap<H256, H256>,
}

impl std::ops::Deref for Detector {
type Target = Inner;

fn deref(&self) -> &Self::Target {
&self.0
}
}

impl Detector {
/// Creates a new balance override detector.
pub fn new(simulator: Arc<dyn CodeSimulating>) -> Self {
Self { simulator }
pub fn new(simulator: Arc<dyn CodeSimulating>, probing_depth: u8) -> Self {
let holder = {
// On a technical note, Ethereum public addresses are, for the most
// part, generated by taking the 20 last bytes of a Keccak-256 hash (for
// things like contract creation, public address derivation from a
// Secp256k1 public key, etc.), so we use one for our heuristics from a
// 32-byte digest with no know pre-image, to prevent any weird
// interactions with the weird tokens of the world.
let mut address = Address::default();
address.0.copy_from_slice(&keccak256(b"Moo!")[12..]);
address.0[19] = address.0[19].wrapping_sub(1);
address
};

let strategies: Vec<_> = {
// First test storage slots that don't need guesswork.
let mut strategies = vec![
Strategy::SoladyMapping,
Strategy::SolidityMapping {
slot: U256::from(OPEN_ZEPPELIN_ERC20_UPGRADEABLE),
},
];

// For each entry point probe the first n following slots.
let entry_points = [
// solc lays out memory linearly starting at 0 by default
"0000000000000000000000000000000000000000000000000000000000000000",
];
for start_slot in entry_points {
let mut slot = U256::from(start_slot);
for _ in 0..probing_depth {
strategies.push(Strategy::SolidityMapping { slot });
slot += U256::one();
}
}

strategies
.into_iter()
.enumerate()
.map(|(index, strategy)| StrategyHelper::new(strategy, index))
.collect()
};

let state_overrides = strategies
.iter()
.map(|helper| helper.strategy.state_override(&holder, &helper.balance))
.collect::<HashMap<_, _>>();

Self(Arc::new(Inner {
simulator,
holder,
strategies,
state_overrides,
}))
}

/// Tries to detect the balance override strategy for the specified token.
Expand All @@ -35,12 +103,12 @@ impl Detector {
let token = dummy_contract!(ERC20, token);
let call = CallRequest {
to: Some(token.address()),
data: token.methods().balance_of(*HOLDER).m.tx.data,
data: token.methods().balance_of(self.holder).m.tx.data,
..Default::default()
};
let overrides = hashmap! {
token.address() => StateOverride {
state_diff: Some(STORAGE_OVERRIDES.clone()),
state_diff: Some(self.state_overrides.clone()),
..Default::default()
},
};
Expand All @@ -50,7 +118,7 @@ impl Detector {
.then(|| U256::from_big_endian(&output))
.ok_or(DetectionError::Decode)?;

TESTED_STRATEGIES
self.strategies
.iter()
.find_map(|helper| (helper.balance == balance).then_some(helper.strategy.clone()))
.ok_or(DetectionError::NotFound)
Expand All @@ -67,7 +135,8 @@ struct StrategyHelper {
}

impl StrategyHelper {
fn new(strategy: Strategy, index: u8) -> Self {
fn new(strategy: Strategy, index: usize) -> Self {
let index = u8::try_from(index).expect("unreasonable amount of strategies used");
Self {
strategy,
// Use an exact value which isn't too large or too small. This helps
Expand All @@ -81,56 +150,10 @@ impl StrategyHelper {
}
}

/// Storage slot based on OpenZeppelin's ERC20Upgradeable contract [^1].
///
/// [^1]: <https://github.com/OpenZeppelin/openzeppelin-contracts-upgradeable/blob/master/contracts/token/ERC20/ERC20Upgradeable.sol#L43-L44>
static OPEN_ZEPPELIN_ERC20_UPGRADEABLE: &str =
// <https://github.com/OpenZeppelin/openzeppelin-contracts-upgradeable/blob/master/contracts/token/ERC20/ERC20Upgradeable.sol#L43-L44>
const OPEN_ZEPPELIN_ERC20_UPGRADEABLE: &str =
"52c63247e1f47db19d5ce0460030c497f067ca4cebf71ba98eeadabe20bace00";

/// Address which we try to override the balances for.
static HOLDER: LazyLock<Address> = LazyLock::new(|| {
// On a technical note, Ethereum public addresses are, for the most
// part, generated by taking the 20 last bytes of a Keccak-256 hash (for
// things like contract creation, public address derivation from a
// Secp256k1 public key, etc.), so we use one for our heuristics from a
// 32-byte digest with no know pre-image, to prevent any weird
// interactions with the weird tokens of the world.
let mut address = Address::default();
address.0.copy_from_slice(&keccak256(b"Moo!")[12..]);
address.0[19] = address.0[19].wrapping_sub(1);
address
});

/// All the strategies we use to detect where a token stores the balances.
static TESTED_STRATEGIES: LazyLock<Vec<StrategyHelper>> = LazyLock::new(|| {
const FIRST_N_SLOTS: u8 = 25;

// This is a pretty unsophisticated strategy where we basically try a
// bunch of different slots and see which one sticks. We try balance
// mappings for the first `TRIES` slots; each with a unique value.
(0..FIRST_N_SLOTS).map(|i| {
let strategy = Strategy::SolidityMapping { slot: U256::from(i) };
StrategyHelper::new(strategy, i)
})
// Afterwards we try hardcoded storage slots based on popular utility
// libraries like OpenZeppelin.
.chain((FIRST_N_SLOTS..).zip([
Strategy::SolidityMapping{ slot: U256::from(OPEN_ZEPPELIN_ERC20_UPGRADEABLE) },
Strategy::SoladyMapping,
]).map(|(index, strategy)| {
StrategyHelper::new(strategy, index)
}))
.collect()
});

/// Storage overrides (storage_slot, value) for all tested strategies.
static STORAGE_OVERRIDES: LazyLock<HashMap<H256, H256>> = LazyLock::new(|| {
TESTED_STRATEGIES
.iter()
.map(|helper| helper.strategy.state_override(&HOLDER, &helper.balance))
.collect::<HashMap<_, _>>()
});

impl Debug for Detector {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
f.debug_struct("Detector")
Expand Down Expand Up @@ -159,10 +182,8 @@ mod tests {
/// Set `NODE_URL` environment to a mainnet RPC URL.
#[ignore]
#[tokio::test]
async fn detects_storage_slots() {
let detector = Detector {
simulator: Arc::new(Web3::new(create_env_test_transport())),
};
async fn detects_storage_slots_mainnet() {
let detector = Detector::new(Arc::new(Web3::new(create_env_test_transport())), 60);

let storage = detector
.detect(addr!("c02aaa39b223fe8d0a0e5c4f27ead9083c756cc2"))
Expand All @@ -187,4 +208,20 @@ mod tests {
.unwrap();
assert_eq!(storage, Strategy::SoladyMapping);
}

/// Tests that we can detect storage slots by probing the first
/// n slots or by checking hardcoded known slots.
/// Set `NODE_URL` environment to an arbitrum RPC URL.
#[ignore]
#[tokio::test]
async fn detects_storage_slots_arbitrum() {
let detector = Detector::new(Arc::new(Web3::new(create_env_test_transport())), 60);

// all bridged tokens on arbitrum require a ton of probing
let storage = detector
.detect(addr!("ff970a61a04b1ca14834a43f5de4533ebddb5cc8"))
.await
.unwrap();
assert_eq!(storage, Strategy::SolidityMapping { slot: 51.into() });
}
}

0 comments on commit df6306c

Please sign in to comment.