From 34fc18134a5a658d61ccfdf14a70cef33f878092 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=CF=80a?= <76558220+rkdud007@users.noreply.github.com> Date: Mon, 18 Nov 2024 17:41:19 +0900 Subject: [PATCH] feat: starknet input working --- hdp/src/preprocessor/compile/module.rs | 4 - hdp/src/preprocessor/mod.rs | 211 +++++++++++------- hdp/src/preprocessor/module_compile.rs | 95 ++++++++ .../processed_types/block_proofs.rs | 18 +- .../cairo_format/block_proofs.rs | 86 ++++--- hdp/src/primitives/processed_types/query.rs | 3 +- 6 files changed, 292 insertions(+), 125 deletions(-) create mode 100644 hdp/src/preprocessor/module_compile.rs diff --git a/hdp/src/preprocessor/compile/module.rs b/hdp/src/preprocessor/compile/module.rs index 9244dfc0..1540575e 100644 --- a/hdp/src/preprocessor/compile/module.rs +++ b/hdp/src/preprocessor/compile/module.rs @@ -58,10 +58,6 @@ impl Compilable for ModuleVec { // 3. Categorize fetch keys by chain ID let categorized_keys = categorize_fetch_keys(dry_run_module.fetch_keys); - if categorized_keys.len() > 1 { - // TODO: This is a temporary solution. Need to handle multiple chain IDs in the future - panic!("Multiple chain IDs are not supported yet"); - } // Initialize maps to store fetched proofs grouped by chain ID let mut accounts_map = HashMap::new(); diff --git a/hdp/src/preprocessor/mod.rs b/hdp/src/preprocessor/mod.rs index bcfdb901..708210d8 100644 --- a/hdp/src/preprocessor/mod.rs +++ b/hdp/src/preprocessor/mod.rs @@ -2,6 +2,7 @@ //! This will be most abstract layer of the preprocessor. use crate::constant::SOUND_CAIRO_RUN_OUTPUT_FILE; +use crate::preprocessor::module_compile::module_compile; use crate::primitives::merkle_tree::{build_result_merkle_tree, build_task_merkle_tree}; use crate::primitives::processed_types::datalake_compute::ProcessedDatalakeCompute; use crate::primitives::processed_types::module::ProcessedModule; @@ -20,6 +21,8 @@ use tracing::{debug, info}; pub mod compile; pub mod module_registry; +pub mod module_compile; + #[derive(Error, Debug)] pub enum PreProcessorError { #[error("Failed to compile the tasks")] @@ -47,91 +50,141 @@ impl PreProcessor { &self, tasks: Vec, ) -> Result { - // 1. compile the given tasks - let compiled_results = tasks - .compile(&self.compile_config) - .await - .map_err(PreProcessorError::CompileError)?; + // TODO: temp solution, we first check if first task is module - send to module_compile + match tasks.clone().into_iter().next().unwrap() { + TaskEnvelope::Module(module_task) => { + let compiled_results = + module_compile(module_task.clone(), &self.compile_config).await?; + let module = TaskEnvelope::Module(module_task.clone()); + let task_commitment = module.commit(); + let tasks_commitments: Vec = vec![task_commitment]; + let tasks_merkle_tree = build_task_merkle_tree(&tasks_commitments); + let results_merkle_tree_result = + build_result_merkle_tree(&tasks_commitments, &compiled_results.task_results); + let (result_merkle_tree, results_commitments) = results_merkle_tree_result; + let task_merkle_root = tasks_merkle_tree.root(); + let encoded_task = module_task.task.encode_task(); + let result_commitment = results_commitments[0]; + let compiled_result = compiled_results.task_results[0]; + debug!("compiled_result: {:#?}", compiled_result); + let result_proof = result_merkle_tree + .get_proof(&DynSolValue::FixedBytes(result_commitment, 32)) + .unwrap(); + let task_proof = tasks_merkle_tree + .get_proof(&DynSolValue::FixedBytes(task_commitment, 32)) + .unwrap(); + let processed_module = ProcessedModule::new( + Bytes::from(encoded_task), + task_commitment, + result_commitment, + compiled_result, + task_proof, + result_proof, + module_task.task.inputs, + module_task.module_class, + ); + + let task = ProcessedTask::Module(processed_module); - let tasks_commitments: Vec = - tasks.iter().map(|task| task.commit()).collect::>(); - let tasks_merkle_tree = build_task_merkle_tree(&tasks_commitments); - let results_merkle_tree_result = - build_result_merkle_tree(&tasks_commitments, &compiled_results.task_results); - let (result_merkle_tree, results_commitments) = results_merkle_tree_result; - let task_merkle_root = tasks_merkle_tree.root(); - let mut combined_tasks = Vec::new(); + let processed_result = ProcessorInput::new( + SOUND_CAIRO_RUN_OUTPUT_FILE.into(), + result_merkle_tree.root(), + task_merkle_root, + compiled_results.proofs.into_values().collect(), + vec![task], + ); + info!("1️⃣ Preprocessor completed successfully"); + Ok(processed_result) + } + TaskEnvelope::DatalakeCompute(_) => { + // 1. compile the given tasks + let compiled_results = tasks + .compile(&self.compile_config) + .await + .map_err(PreProcessorError::CompileError)?; - for (i, task) in tasks.into_iter().enumerate() { - match task { - TaskEnvelope::DatalakeCompute(datalake_compute) => { - let task_commitment = datalake_compute.commit(); - let result_commitment = results_commitments[i]; - let compiled_result = compiled_results.task_results[i]; - let result_proof = result_merkle_tree - .get_proof(&DynSolValue::FixedBytes(result_commitment, 32)) - .unwrap(); - let task_proof = tasks_merkle_tree - .get_proof(&DynSolValue::FixedBytes(task_commitment, 32)) - .unwrap(); - let encoded_task = datalake_compute.encode()?; - let datalake_type = datalake_compute.datalake.get_datalake_type(); - let property_type = datalake_compute.datalake.get_collection_type().to_index(); - debug!("compiled_result: {:#?}", compiled_result); - let datalake_compute = ProcessedDatalakeCompute::new( - Bytes::from(encoded_task), - task_commitment, - compiled_result, - result_commitment, - task_proof, - result_proof, - Bytes::from(datalake_compute.datalake.encode()?), - datalake_type.into(), - property_type, - ); + let tasks_commitments: Vec = + tasks.iter().map(|task| task.commit()).collect::>(); + let tasks_merkle_tree = build_task_merkle_tree(&tasks_commitments); + let results_merkle_tree_result = + build_result_merkle_tree(&tasks_commitments, &compiled_results.task_results); + let (result_merkle_tree, results_commitments) = results_merkle_tree_result; + let task_merkle_root = tasks_merkle_tree.root(); + let mut combined_tasks = Vec::new(); - let task = ProcessedTask::DatalakeCompute(datalake_compute); - combined_tasks.push(task); - } - TaskEnvelope::Module(module) => { - let task_commitment = module.task.commit(); - let encoded_task = module.task.encode_task(); - let result_commitment = results_commitments[i]; - let compiled_result = compiled_results.task_results[i]; - debug!("compiled_result: {:#?}", compiled_result); - let result_proof = result_merkle_tree - .get_proof(&DynSolValue::FixedBytes(result_commitment, 32)) - .unwrap(); - let task_proof = tasks_merkle_tree - .get_proof(&DynSolValue::FixedBytes(task_commitment, 32)) - .unwrap(); - let processed_module = ProcessedModule::new( - Bytes::from(encoded_task), - task_commitment, - result_commitment, - compiled_result, - task_proof, - result_proof, - module.task.inputs, - module.module_class, - ); + for (i, task) in tasks.into_iter().enumerate() { + match task { + TaskEnvelope::DatalakeCompute(datalake_compute) => { + let task_commitment = datalake_compute.commit(); + let result_commitment = results_commitments[i]; + let compiled_result = compiled_results.task_results[i]; + let result_proof = result_merkle_tree + .get_proof(&DynSolValue::FixedBytes(result_commitment, 32)) + .unwrap(); + let task_proof = tasks_merkle_tree + .get_proof(&DynSolValue::FixedBytes(task_commitment, 32)) + .unwrap(); + let encoded_task = datalake_compute.encode()?; + let datalake_type = datalake_compute.datalake.get_datalake_type(); + let property_type = + datalake_compute.datalake.get_collection_type().to_index(); + debug!("compiled_result: {:#?}", compiled_result); + let datalake_compute = ProcessedDatalakeCompute::new( + Bytes::from(encoded_task), + task_commitment, + compiled_result, + result_commitment, + task_proof, + result_proof, + Bytes::from(datalake_compute.datalake.encode()?), + datalake_type.into(), + property_type, + ); - let task = ProcessedTask::Module(processed_module); - combined_tasks.push(task); + let task = ProcessedTask::DatalakeCompute(datalake_compute); + combined_tasks.push(task); + } + TaskEnvelope::Module(module) => { + let task_commitment = module.task.commit(); + let encoded_task = module.task.encode_task(); + let result_commitment = results_commitments[i]; + let compiled_result = compiled_results.task_results[i]; + debug!("compiled_result: {:#?}", compiled_result); + let result_proof = result_merkle_tree + .get_proof(&DynSolValue::FixedBytes(result_commitment, 32)) + .unwrap(); + let task_proof = tasks_merkle_tree + .get_proof(&DynSolValue::FixedBytes(task_commitment, 32)) + .unwrap(); + let processed_module = ProcessedModule::new( + Bytes::from(encoded_task), + task_commitment, + result_commitment, + compiled_result, + task_proof, + result_proof, + module.task.inputs, + module.module_class, + ); + + let task = ProcessedTask::Module(processed_module); + combined_tasks.push(task); + } + } } + + // TODO: this chain id need to be fix + let proofs = compiled_results.to_processed_block_vec(); + let processed_result = ProcessorInput::new( + SOUND_CAIRO_RUN_OUTPUT_FILE.into(), + result_merkle_tree.root(), + task_merkle_root, + proofs, + combined_tasks, + ); + info!("1️⃣ Preprocessor completed successfully"); + Ok(processed_result) } } - - // TODO: this chain id need to be fix - let proofs = compiled_results.to_processed_block_vec(); - let processed_result = ProcessorInput::new( - SOUND_CAIRO_RUN_OUTPUT_FILE.into(), - result_merkle_tree.root(), - task_merkle_root, - proofs, - combined_tasks, - ); - info!("1️⃣ Preprocessor completed successfully"); - Ok(processed_result) } } diff --git a/hdp/src/preprocessor/module_compile.rs b/hdp/src/preprocessor/module_compile.rs new file mode 100644 index 00000000..a5f6c0d0 --- /dev/null +++ b/hdp/src/preprocessor/module_compile.rs @@ -0,0 +1,95 @@ +// ! This file is sketching out compilation logic that only considering module task. +// ! We already confirmed on direction to deprecating datalake, which curren ./compile file +//! is causing too much overhead interms of abstraction around legacy types that we supported. +//! Ideally later this file will deprecate ./compile + +use alloy::primitives::U256; +use std::collections::HashMap; +use std::path::PathBuf; +use tracing::info; + +use crate::{ + cairo_runner::{cairo_dry_run, dry_run::DryRunResult, input::dry_run::DryRunnerProgramInput}, + constant::DRY_CAIRO_RUN_OUTPUT_FILE, + primitives::{ + processed_types::{block_proofs::ProcessedBlockProofs, cairo_format}, + task::ExtendedModule, + ChainId, + }, + provider::{key::categorize_fetch_keys, traits::new_provider_from_config}, +}; + +use super::compile::{config::CompilerConfig, CompileError}; + +#[derive(Debug, Default, PartialEq)] +pub struct ModuleCompilationResult { + /// results of tasks + pub task_results: Vec, + /// proofs + pub proofs: HashMap, +} + +pub async fn module_compile( + task: ExtendedModule, + compile_config: &CompilerConfig, +) -> Result { + // Log the target task for debugging purposes + info!("target task: {:#?}", task); + let dry_run_program_path = compile_config.dry_run_program_path.clone(); + + // Generate input for the dry run based on the extended modules + let dry_run_input = DryRunnerProgramInput::new( + PathBuf::from(DRY_CAIRO_RUN_OUTPUT_FILE), + vec![cairo_format::DryRunProcessedModule::new( + task.task.inputs, + task.module_class, + )], + ); + let input_string = + serde_json::to_string_pretty(&dry_run_input).expect("Failed to serialize module class"); + + // 2. Run the dry run and retrieve the fetch points + info!("2. Running dry-run... "); + let dry_run_results: DryRunResult = cairo_dry_run( + dry_run_program_path, + input_string, + compile_config.save_fetch_keys_file.clone(), + )?; + + // TODO: prob as soon as we deprecate data lake this check no need + // Check if the program hash matches the expected hash + if dry_run_results[0].program_hash != task.task.program_hash { + return Err(CompileError::ClassHashMismatch); + } + // Ensure only one module is supported + if dry_run_results.len() != 1 { + panic!("Multiple Modules are not supported"); + } + + // Extract the dry run module result + let dry_run_module = dry_run_results.into_iter().next().unwrap(); + let task_results = vec![dry_run_module.result.into()]; + + // 3. Categorize fetch keys by chain ID + let categorized_keys = categorize_fetch_keys(dry_run_module.fetch_keys); + + let mut proofs_map: std::collections::HashMap = HashMap::new(); + info!("3. Fetching proofs from provider..."); + // Loop through each chain ID and fetch proofs + for (chain_id, keys) in categorized_keys { + info!("target provider chain id: {}", chain_id); + let target_provider_config = compile_config + .provider_config + .get(&chain_id) + .expect("target task's chain had not been configured."); + let provider = new_provider_from_config(target_provider_config); + let results = provider.fetch_proofs_from_keys(keys).await?; + // Update the maps with fetched results + proofs_map.insert(chain_id, results); + } + + Ok(ModuleCompilationResult { + task_results, + proofs: proofs_map, + }) +} diff --git a/hdp/src/primitives/processed_types/block_proofs.rs b/hdp/src/primitives/processed_types/block_proofs.rs index 19f708bf..494b1adb 100644 --- a/hdp/src/primitives/processed_types/block_proofs.rs +++ b/hdp/src/primitives/processed_types/block_proofs.rs @@ -35,6 +35,21 @@ impl ProcessedBlockProofs { ProcessedBlockProofs::StarkNet(starknet) => Some(starknet), } } + + pub fn get_mmr_meta(self) -> Vec { + match self { + ProcessedBlockProofs::Evm(evm_proofs) => evm_proofs + .mmr_with_headers + .into_iter() + .map(|m| m.mmr_meta) + .collect(), + ProcessedBlockProofs::StarkNet(sn_proofs) => sn_proofs + .mmr_with_headers + .into_iter() + .map(|m| m.mmr_meta) + .collect(), + } + } } #[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Eq, Hash)] @@ -42,9 +57,6 @@ pub struct StarkNetBlockProofs { pub chain_id: String, pub mmr_with_headers: Vec, pub storages: Vec, - // Since accounts, transactions, and transaction_receipts do not exist for StarkNet, - // we omit them or include any StarkNet-specific fields if necessary. - // Add any StarkNet-specific fields here. } #[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Eq, Hash)] diff --git a/hdp/src/primitives/processed_types/cairo_format/block_proofs.rs b/hdp/src/primitives/processed_types/cairo_format/block_proofs.rs index 04dd27fd..9d97f4c4 100644 --- a/hdp/src/primitives/processed_types/cairo_format/block_proofs.rs +++ b/hdp/src/primitives/processed_types/cairo_format/block_proofs.rs @@ -1,7 +1,8 @@ use serde::{Deserialize, Serialize}; use crate::primitives::processed_types::{ - block_proofs::ProcessedBlockProofs as BaseProcessedBlockProofs, mmr::MMRMeta, + block_proofs::{ProcessedBlockProofs as BaseProcessedBlockProofs, StarkNetBlockProofs}, + mmr::MMRMeta, }; use super::{ @@ -13,47 +14,58 @@ impl AsCairoFormat for BaseProcessedBlockProofs { type Output = ProcessedBlockProofs; fn as_cairo_format(&self) -> Self::Output { - let evm_proof = self.clone().get_evm_proofs().unwrap(); - ProcessedBlockProofs { - chain_id: evm_proof.chain_id, - mmr_with_headers: evm_proof - .mmr_with_headers - .iter() - .map(|mmr_with_header| MMRWithHeader { - mmr_meta: mmr_with_header.mmr_meta.clone(), - headers: mmr_with_header - .headers - .iter() - .map(|header| header.as_cairo_format()) - .collect(), - }) - .collect(), - accounts: evm_proof - .accounts - .iter() - .map(|account| account.as_cairo_format()) - .collect(), - storages: evm_proof - .storages - .iter() - .map(|storage| storage.as_cairo_format()) - .collect(), - transactions: evm_proof - .transactions - .iter() - .map(|transaction| transaction.as_cairo_format()) - .collect(), - transaction_receipts: evm_proof - .transaction_receipts - .iter() - .map(|receipt| receipt.as_cairo_format()) - .collect(), + match self { + BaseProcessedBlockProofs::Evm(evm_proof) => ProcessedBlockProofs::Evm(EvmBlockProofs { + chain_id: evm_proof.chain_id.clone(), + mmr_with_headers: evm_proof + .mmr_with_headers + .iter() + .map(|mmr_with_header| MMRWithHeader { + mmr_meta: mmr_with_header.mmr_meta.clone(), + headers: mmr_with_header + .headers + .iter() + .map(|header| header.as_cairo_format()) + .collect(), + }) + .collect(), + accounts: evm_proof + .accounts + .iter() + .map(|account| account.as_cairo_format()) + .collect(), + storages: evm_proof + .storages + .iter() + .map(|storage| storage.as_cairo_format()) + .collect(), + transactions: evm_proof + .transactions + .iter() + .map(|transaction| transaction.as_cairo_format()) + .collect(), + transaction_receipts: evm_proof + .transaction_receipts + .iter() + .map(|receipt| receipt.as_cairo_format()) + .collect(), + }), + BaseProcessedBlockProofs::StarkNet(sn_proofs) => { + ProcessedBlockProofs::StarkNet(sn_proofs.clone()) + } } } } #[derive(Serialize, Deserialize)] -pub struct ProcessedBlockProofs { +#[serde(untagged)] +pub enum ProcessedBlockProofs { + Evm(EvmBlockProofs), + StarkNet(StarkNetBlockProofs), +} + +#[derive(Serialize, Deserialize)] +pub struct EvmBlockProofs { pub chain_id: String, pub mmr_with_headers: Vec, pub accounts: Vec, diff --git a/hdp/src/primitives/processed_types/query.rs b/hdp/src/primitives/processed_types/query.rs index f48eec20..db6f5fe0 100644 --- a/hdp/src/primitives/processed_types/query.rs +++ b/hdp/src/primitives/processed_types/query.rs @@ -62,8 +62,7 @@ impl ProcessorInput { let mmr_metas: Vec = self .proofs .iter() - .flat_map(|x| x.clone().get_evm_proofs().unwrap().mmr_with_headers) - .map(|mmr_with_header| mmr_with_header.mmr_meta.clone()) + .flat_map(|x| x.clone().get_mmr_meta()) .collect(); ProcessorOutput::new(