Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(storage): support pruning Merkle tries #1747

Merged
merged 7 commits into from
Apr 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased

### Added

- Pathfinder now supports storing only the latest state of the Merkle tries. This can be enabled by specifying '--storage.prune-state-tries true' on the command line when creating a new database.
- When enabled, storage proofs can be generated only for the latest block.
- Pruned merkle tries take significantly less disk space than full ones.
- Pathfinder stores this setting in its database and defaults to using that.
- Once set pruning cannot be enabled/disabled for non-empty databases.

## [0.11.5] - 2024-04-02

### Changed
Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 4 additions & 7 deletions crates/pathfinder/examples/feeder_gateway.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,10 @@ async fn main() -> anyhow::Result<()> {

async fn serve() -> anyhow::Result<()> {
let database_path = std::env::args().nth(1).unwrap();
let storage = pathfinder_storage::Storage::migrate(
database_path.into(),
pathfinder_storage::JournalMode::WAL,
1,
)?
.create_pool(NonZeroU32::new(10).unwrap())
.unwrap();
let storage = pathfinder_storage::StorageBuilder::file(database_path.into())
.migrate()?
.create_pool(NonZeroU32::new(10).unwrap())
.unwrap();

let chain = {
let mut connection = storage.connection()?;
Expand Down
3 changes: 2 additions & 1 deletion crates/pathfinder/examples/migrate_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ fn main() {
let size_before = std::fs::metadata(&path).expect("Path does not exist").len() as i64;

let started_at = std::time::Instant::now();
pathfinder_storage::Storage::migrate(path.clone(), pathfinder_storage::JournalMode::WAL, 1)
pathfinder_storage::StorageBuilder::file(path.clone())
.migrate()
.unwrap();
let migrated_at = std::time::Instant::now();

Expand Down
5 changes: 3 additions & 2 deletions crates/pathfinder/examples/re_execute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use pathfinder_common::receipt::Receipt;
use pathfinder_common::transaction::Transaction;
use pathfinder_common::{BlockHeader, BlockNumber, ChainId};
use pathfinder_executor::ExecutionState;
use pathfinder_storage::{BlockId, JournalMode, Storage};
use pathfinder_storage::{BlockId, Storage};
use rayon::prelude::*;

// The Cairo VM allocates felts on the stack, so during execution it's making
Expand All @@ -31,7 +31,8 @@ fn main() -> anyhow::Result<()> {
let n_cpus = rayon::current_num_threads();

let database_path = std::env::args().nth(1).unwrap();
let storage = Storage::migrate(database_path.into(), JournalMode::WAL, 1)?
let storage = pathfinder_storage::StorageBuilder::file(database_path.into())
.migrate()?
.create_pool(NonZeroU32::new(n_cpus as u32 * 2).unwrap())?;
let mut db = storage
.connection()
Expand Down
5 changes: 3 additions & 2 deletions crates/pathfinder/examples/test_state_rollback.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::{num::NonZeroU32, time::Instant};

use anyhow::Context;
use pathfinder_common::BlockNumber;
use pathfinder_storage::{BlockId, JournalMode, Storage};
use pathfinder_storage::{BlockId, StorageBuilder};

fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
Expand All @@ -11,7 +11,8 @@ fn main() -> anyhow::Result<()> {
.init();

let database_path = std::env::args().nth(1).unwrap();
let storage = Storage::migrate(database_path.into(), JournalMode::WAL, 1)?
let storage = StorageBuilder::file(database_path.into())
.migrate()?
.create_pool(NonZeroU32::new(10).unwrap())?;
let mut db = storage
.connection()
Expand Down
4 changes: 2 additions & 2 deletions crates/pathfinder/examples/verify_block_hashes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use anyhow::Context;
use pathfinder_common::{BlockHash, BlockNumber, Chain, ChainId, StarknetVersion};
use pathfinder_crypto::Felt;
use pathfinder_lib::state::block_hash::{verify_block_hash, VerifyResult};
use pathfinder_storage::{JournalMode, Storage};
use starknet_gateway_types::reply::{Block, GasPrices, Status};

/// Verify block hashes in a pathfinder database.
Expand All @@ -24,7 +23,8 @@ fn main() -> anyhow::Result<()> {
};

let database_path = std::env::args().nth(2).unwrap();
let storage = Storage::migrate(database_path.into(), JournalMode::WAL, 1)?
let storage = pathfinder_storage::StorageBuilder::file(database_path.into())
.migrate()?
.create_pool(NonZeroU32::new(1).unwrap())
.unwrap();
let mut db = storage
Expand Down
4 changes: 2 additions & 2 deletions crates/pathfinder/examples/verify_transaction_hashes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ use std::num::NonZeroU32;

use anyhow::Context;
use pathfinder_common::{BlockNumber, ChainId};
use pathfinder_storage::{JournalMode, Storage};
use rayon::prelude::*;

/// Verify transaction hashes in a pathfinder database.
Expand All @@ -29,7 +28,8 @@ fn main() -> anyhow::Result<()> {

println!("Migrating database...");

let storage = Storage::migrate(database_path.into(), JournalMode::WAL, 1)?
let storage = pathfinder_storage::StorageBuilder::file(database_path.into())
.migrate()?
.create_pool(NonZeroU32::new(1).unwrap())
.unwrap();
let mut db = storage
Expand Down
11 changes: 11 additions & 0 deletions crates/pathfinder/src/bin/pathfinder/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,15 @@ This should only be enabled for debugging purposes as it adds substantial proces
default_value = "100000"
)]
get_events_max_uncached_bloom_filters_to_load: std::num::NonZeroUsize,

#[arg(
long = "storage.prune-state-tries",
long_help = r"When enabled, only the last state of the Merkle tries is kept in the database. \
This can be used to reduce the disk space usage at the cost of only being able to provide storage proofs for the latest block.",
env = "PATHFINDER_PRUNE_MERKLE_TRIES",
value_name = "BOOL"
)]
prune_merkle_tries: Option<bool>,
}

#[derive(clap::ValueEnum, Debug, Clone, Copy, PartialEq)]
Expand Down Expand Up @@ -552,6 +561,7 @@ pub struct Config {
pub event_bloom_filter_cache_size: NonZeroUsize,
pub get_events_max_blocks_to_scan: NonZeroUsize,
pub get_events_max_uncached_bloom_filters_to_load: NonZeroUsize,
pub prune_merkle_tries: Option<bool>,
}

pub struct Ethereum {
Expand Down Expand Up @@ -768,6 +778,7 @@ impl Config {
get_events_max_uncached_bloom_filters_to_load: cli
.get_events_max_uncached_bloom_filters_to_load,
gateway_timeout: Duration::from_secs(cli.gateway_timeout.get()),
prune_merkle_tries: cli.prune_merkle_tries,
}
}
}
Expand Down
12 changes: 6 additions & 6 deletions crates/pathfinder/src/bin/pathfinder/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,12 @@ async fn async_main() -> anyhow::Result<()> {

// Setup and verify database

let storage_manager = Storage::migrate(
pathfinder_context.database.clone(),
config.sqlite_wal,
config.event_bloom_filter_cache_size.get(),
)
.unwrap();
let storage_manager =
pathfinder_storage::StorageBuilder::file(pathfinder_context.database.clone())
.journal_mode(config.sqlite_wal)
.bloom_filter_cache_size(config.event_bloom_filter_cache_size.get())
.prune_merkle_tries(config.prune_merkle_tries)
.migrate()?;
let sync_storage = storage_manager
// 5 is enough for normal sync operations, and then `available_parallelism` for
// the rayon thread pool workers to use.
Expand Down
8 changes: 4 additions & 4 deletions crates/pathfinder/src/p2p_network/sync_handlers/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ mod boundary_conditions {
use p2p_proto::receipt::ReceiptsRequest;
use p2p_proto::state::StateDiffsRequest;
use p2p_proto::transaction::TransactionsRequest;
use pathfinder_storage::Storage;
use pathfinder_storage::StorageBuilder;
use rand::Rng;
use rstest::rstest;

Expand Down Expand Up @@ -77,7 +77,7 @@ mod boundary_conditions {
#[case(invalid_start())]
#[tokio::test]
async fn $name(#[case] iteration: Iteration) {
let storage = Storage::in_memory().unwrap();
let storage = StorageBuilder::in_memory().unwrap();
let (tx, mut rx) = mpsc::channel(0);
let _jh = tokio::spawn($uut_name(storage, $request { iteration }, tx));
assert_eq!(rx.next().await.unwrap(), Default::default());
Expand Down Expand Up @@ -501,13 +501,13 @@ mod prop {
mod fixtures {
use crate::p2p_network::sync_handlers::MAX_COUNT_IN_TESTS;
use pathfinder_storage::fake::{with_n_blocks_and_rng, Block};
use pathfinder_storage::Storage;
use pathfinder_storage::{Storage, StorageBuilder};

pub const MAX_NUM_BLOCKS: u64 = MAX_COUNT_IN_TESTS * 2;

pub fn storage_with_seed(seed: u64, num_blocks: u64) -> (Storage, Vec<Block>) {
use rand::SeedableRng;
let storage = Storage::in_memory().unwrap();
let storage = StorageBuilder::in_memory().unwrap();
// Explicitly choose RNG to make sure seeded storage is always reproducible
let mut rng = rand_chacha::ChaCha12Rng::seed_from_u64(seed);
let initializer =
Expand Down
16 changes: 8 additions & 8 deletions crates/pathfinder/src/state/sync.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1193,7 +1193,7 @@ mod tests {
use pathfinder_common::{macro_prelude::*, BlockCommitmentSignature};
use pathfinder_crypto::Felt;
use pathfinder_rpc::SyncState;
use pathfinder_storage::Storage;
use pathfinder_storage::StorageBuilder;
use starknet_gateway_types::reply::Block;
use starknet_gateway_types::reply::{self, GasPrices};
use std::sync::Arc;
Expand Down Expand Up @@ -1279,7 +1279,7 @@ mod tests {

#[tokio::test(flavor = "multi_thread")]
async fn block_updates() {
let storage = Storage::in_memory().unwrap();
let storage = StorageBuilder::in_memory().unwrap();
let mut connection = storage.connection().unwrap();

let (event_tx, event_rx) = tokio::sync::mpsc::channel(100);
Expand Down Expand Up @@ -1324,7 +1324,7 @@ mod tests {

#[tokio::test(flavor = "multi_thread")]
async fn reorg() {
let storage = Storage::in_memory().unwrap();
let storage = StorageBuilder::in_memory().unwrap();
let mut connection = storage.connection().unwrap();

let (event_tx, event_rx) = tokio::sync::mpsc::channel(100);
Expand Down Expand Up @@ -1372,7 +1372,7 @@ mod tests {
// A bug caused reorg'd block numbers to be skipped. This
// was due to the expected block number not being updated
// when handling the reorg.
let storage = Storage::in_memory().unwrap();
let storage = StorageBuilder::in_memory().unwrap();
let mut connection = storage.connection().unwrap();

let (event_tx, event_rx) = tokio::sync::mpsc::channel(100);
Expand Down Expand Up @@ -1427,7 +1427,7 @@ mod tests {

#[tokio::test(flavor = "multi_thread")]
async fn reorg_to_genesis() {
let storage = Storage::in_memory().unwrap();
let storage = StorageBuilder::in_memory().unwrap();
let mut connection = storage.connection().unwrap();

let (event_tx, event_rx) = tokio::sync::mpsc::channel(100);
Expand Down Expand Up @@ -1462,7 +1462,7 @@ mod tests {

#[tokio::test(flavor = "multi_thread")]
async fn new_cairo_contract() {
let storage = Storage::in_memory().unwrap();
let storage = StorageBuilder::in_memory().unwrap();
let mut connection = storage.connection().unwrap();

let (event_tx, event_rx) = tokio::sync::mpsc::channel(1);
Expand Down Expand Up @@ -1500,7 +1500,7 @@ mod tests {

#[tokio::test(flavor = "multi_thread")]
async fn new_sierra_contract() {
let storage = Storage::in_memory().unwrap();
let storage = StorageBuilder::in_memory().unwrap();
let mut connection = storage.connection().unwrap();

let (event_tx, event_rx) = tokio::sync::mpsc::channel(1);
Expand Down Expand Up @@ -1540,7 +1540,7 @@ mod tests {

#[tokio::test(flavor = "multi_thread")]
async fn consumer_should_ignore_duplicate_blocks() {
let storage = Storage::in_memory().unwrap();
let storage = StorageBuilder::in_memory().unwrap();

let (event_tx, event_rx) = tokio::sync::mpsc::channel(5);

Expand Down
6 changes: 3 additions & 3 deletions crates/pathfinder/src/state/sync/l2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -637,7 +637,7 @@ mod tests {
StorageAddress, StorageValue,
};
use pathfinder_crypto::Felt;
use pathfinder_storage::Storage;
use pathfinder_storage::StorageBuilder;
use starknet_gateway_client::MockGatewayApi;
use starknet_gateway_types::{
error::{KnownStarknetErrorCode, SequencerError, StarknetError},
Expand Down Expand Up @@ -808,7 +808,7 @@ mod tests {
tx_event: mpsc::Sender<SyncEvent>,
sequencer: MockGatewayApi,
) -> JoinHandle<anyhow::Result<()>> {
let storage = Storage::in_memory().unwrap();
let storage = StorageBuilder::in_memory().unwrap();
let sequencer = std::sync::Arc::new(sequencer);
let context = L2SyncContext {
sequencer,
Expand Down Expand Up @@ -1189,7 +1189,7 @@ mod tests {
chain: Chain::GoerliTestnet,
chain_id: ChainId::GOERLI_TESTNET,
block_validation_mode: MODE,
storage: Storage::in_memory().unwrap(),
storage: StorageBuilder::in_memory().unwrap(),
};
let latest_track = tokio::sync::watch::channel(Default::default());

Expand Down
6 changes: 3 additions & 3 deletions crates/pathfinder/src/state/sync/pending.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ mod tests {
BlockHash, BlockNumber, BlockTimestamp, GasPrice, StarknetVersion, StateCommitment,
StateUpdate,
};
use pathfinder_storage::Storage;
use pathfinder_storage::StorageBuilder;
use starknet_gateway_client::MockGatewayApi;
use starknet_gateway_types::reply::{Block, L1DataAvailabilityMode, PendingBlock, Status};
use tokio::sync::watch;
Expand Down Expand Up @@ -179,7 +179,7 @@ mod tests {
tx,
sequencer,
std::time::Duration::ZERO,
Storage::in_memory().unwrap(),
StorageBuilder::in_memory().unwrap(),
latest,
current,
)
Expand Down Expand Up @@ -253,7 +253,7 @@ mod tests {
tx,
sequencer,
std::time::Duration::ZERO,
Storage::in_memory().unwrap(),
StorageBuilder::in_memory().unwrap(),
rx_latest,
rx_current,
)
Expand Down
Binary file modified crates/rpc/fixtures/mainnet.sqlite
Binary file not shown.
4 changes: 2 additions & 2 deletions crates/rpc/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ pub mod test_utils {
use pathfinder_common::transaction::*;
use pathfinder_merkle_tree::StorageCommitmentTree;
use pathfinder_storage::TransactionData;
use pathfinder_storage::{BlockId, Storage};
use pathfinder_storage::{BlockId, Storage, StorageBuilder};
use primitive_types::H160;
use starknet_gateway_types::reply::GasPrices;
use std::collections::HashMap;
Expand All @@ -255,7 +255,7 @@ pub mod test_utils {
pub fn setup_storage() -> Storage {
use pathfinder_merkle_tree::contract_state::update_contract_state;

let storage = Storage::in_memory().unwrap();
let storage = StorageBuilder::in_memory().unwrap();
let mut connection = storage.connection().unwrap();
let db_txn = connection.transaction().unwrap();

Expand Down
4 changes: 2 additions & 2 deletions crates/rpc/src/pending.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ mod tests {
let (sender, receiver) = tokio::sync::watch::channel(Default::default());
let uut = PendingWatcher::new(receiver);

let mut storage = pathfinder_storage::Storage::in_memory()
let mut storage = pathfinder_storage::StorageBuilder::in_memory()
.unwrap()
.connection()
.unwrap();
Expand Down Expand Up @@ -169,7 +169,7 @@ mod tests {
let (_sender, receiver) = tokio::sync::watch::channel(Default::default());
let uut = PendingWatcher::new(receiver);

let mut storage = pathfinder_storage::Storage::in_memory()
let mut storage = pathfinder_storage::StorageBuilder::in_memory()
.unwrap()
.connection()
.unwrap();
Expand Down
2 changes: 1 addition & 1 deletion crates/rpc/src/test_setup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ pub async fn test_storage<F: FnOnce(StateUpdate) -> StateUpdate>(
version: StarknetVersion,
customize_state_update: F,
) -> (Storage, BlockHeader, ContractAddress, ContractAddress) {
let storage = Storage::in_memory().unwrap();
let storage = pathfinder_storage::StorageBuilder::in_memory().unwrap();
let mut db = storage.connection().unwrap();
let tx = db.transaction().unwrap();

Expand Down
2 changes: 1 addition & 1 deletion crates/rpc/src/v03/method/get_state_update.rs
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ mod tests {

/// Add some dummy state updates to the context for testing
fn context_with_state_updates() -> (Vec<types::StateUpdate>, RpcContext) {
let storage = pathfinder_storage::Storage::in_memory().unwrap();
let storage = pathfinder_storage::StorageBuilder::in_memory().unwrap();

let state_updates = pathfinder_storage::fake::with_n_blocks(&storage, 3)
.into_iter()
Expand Down
Loading
Loading