From 34fc18134a5a658d61ccfdf14a70cef33f878092 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=CF=80a?= <76558220+rkdud007@users.noreply.github.com>
Date: Mon, 18 Nov 2024 17:41:19 +0900
Subject: [PATCH] feat: starknet input working

---
 hdp/src/preprocessor/compile/module.rs        |   4 -
 hdp/src/preprocessor/mod.rs                   | 211 +++++++++++-------
 hdp/src/preprocessor/module_compile.rs        |  95 ++++++++
 .../processed_types/block_proofs.rs           |  18 +-
 .../cairo_format/block_proofs.rs              |  86 ++++---
 hdp/src/primitives/processed_types/query.rs   |   3 +-
 6 files changed, 292 insertions(+), 125 deletions(-)
 create mode 100644 hdp/src/preprocessor/module_compile.rs
diff --git a/hdp/src/preprocessor/compile/module.rs b/hdp/src/preprocessor/compile/module.rs
index 9244dfc0..1540575e 100644
--- a/hdp/src/preprocessor/compile/module.rs
+++ b/hdp/src/preprocessor/compile/module.rs
@@ -58,10 +58,6 @@ impl Compilable for ModuleVec {
 
         // 3. Categorize fetch keys by chain ID
         let categorized_keys = categorize_fetch_keys(dry_run_module.fetch_keys);
-        if categorized_keys.len() > 1 {
-            // TODO: This is a temporary solution. Need to handle multiple chain IDs in the future
-            panic!("Multiple chain IDs are not supported yet");
-        }
 
         // Initialize maps to store fetched proofs grouped by chain ID
         let mut accounts_map = HashMap::new();
diff --git a/hdp/src/preprocessor/mod.rs b/hdp/src/preprocessor/mod.rs
index bcfdb901..708210d8 100644
--- a/hdp/src/preprocessor/mod.rs
+++ b/hdp/src/preprocessor/mod.rs
@@ -2,6 +2,7 @@
 //!  This will be most abstract layer of the preprocessor.
 
 use crate::constant::SOUND_CAIRO_RUN_OUTPUT_FILE;
+use crate::preprocessor::module_compile::module_compile;
 use crate::primitives::merkle_tree::{build_result_merkle_tree, build_task_merkle_tree};
 use crate::primitives::processed_types::datalake_compute::ProcessedDatalakeCompute;
 use crate::primitives::processed_types::module::ProcessedModule;
@@ -20,6 +21,8 @@ use tracing::{debug, info};
 pub mod compile;
 pub mod module_registry;
 
+pub mod module_compile;
+
 #[derive(Error, Debug)]
 pub enum PreProcessorError {
     #[error("Failed to compile the tasks")]
@@ -47,91 +50,141 @@ impl PreProcessor {
         &self,
         tasks: Vec<TaskEnvelope>,
     ) -> Result<ProcessorInput, PreProcessorError> {
-        // 1. compile the given tasks
-        let compiled_results = tasks
-            .compile(&self.compile_config)
-            .await
-            .map_err(PreProcessorError::CompileError)?;
+        // TODO: temp solution, we first check if first task is module - send to module_compile
+        match tasks.clone().into_iter().next().unwrap() {
+            TaskEnvelope::Module(module_task) => {
+                let compiled_results =
+                    module_compile(module_task.clone(), &self.compile_config).await?;
+                let module = TaskEnvelope::Module(module_task.clone());
+                let task_commitment = module.commit();
+                let tasks_commitments: Vec<B256> = vec![task_commitment];
+                let tasks_merkle_tree = build_task_merkle_tree(&tasks_commitments);
+                let results_merkle_tree_result =
+                    build_result_merkle_tree(&tasks_commitments, &compiled_results.task_results);
+                let (result_merkle_tree, results_commitments) = results_merkle_tree_result;
+                let task_merkle_root = tasks_merkle_tree.root();
+                let encoded_task = module_task.task.encode_task();
+                let result_commitment = results_commitments[0];
+                let compiled_result = compiled_results.task_results[0];
+                debug!("compiled_result: {:#?}", compiled_result);
+                let result_proof = result_merkle_tree
+                    .get_proof(&DynSolValue::FixedBytes(result_commitment, 32))
+                    .unwrap();
+                let task_proof = tasks_merkle_tree
+                    .get_proof(&DynSolValue::FixedBytes(task_commitment, 32))
+                    .unwrap();
+                let processed_module = ProcessedModule::new(
+                    Bytes::from(encoded_task),
+                    task_commitment,
+                    result_commitment,
+                    compiled_result,
+                    task_proof,
+                    result_proof,
+                    module_task.task.inputs,
+                    module_task.module_class,
+                );
+
+                let task = ProcessedTask::Module(processed_module);
 
-        let tasks_commitments: Vec<B256> =
-            tasks.iter().map(|task| task.commit()).collect::<Vec<_>>();
-        let tasks_merkle_tree = build_task_merkle_tree(&tasks_commitments);
-        let results_merkle_tree_result =
-            build_result_merkle_tree(&tasks_commitments, &compiled_results.task_results);
-        let (result_merkle_tree, results_commitments) = results_merkle_tree_result;
-        let task_merkle_root = tasks_merkle_tree.root();
-        let mut combined_tasks = Vec::new();
+                let processed_result = ProcessorInput::new(
+                    SOUND_CAIRO_RUN_OUTPUT_FILE.into(),
+                    result_merkle_tree.root(),
+                    task_merkle_root,
+                    compiled_results.proofs.into_values().collect(),
+                    vec![task],
+                );
+                info!("1️⃣  Preprocessor completed successfully");
+                Ok(processed_result)
+            }
+            TaskEnvelope::DatalakeCompute(_) => {
+                // 1. compile the given tasks
+                let compiled_results = tasks
+                    .compile(&self.compile_config)
+                    .await
+                    .map_err(PreProcessorError::CompileError)?;
 
-        for (i, task) in tasks.into_iter().enumerate() {
-            match task {
-                TaskEnvelope::DatalakeCompute(datalake_compute) => {
-                    let task_commitment = datalake_compute.commit();
-                    let result_commitment = results_commitments[i];
-                    let compiled_result = compiled_results.task_results[i];
-                    let result_proof = result_merkle_tree
-                        .get_proof(&DynSolValue::FixedBytes(result_commitment, 32))
-                        .unwrap();
-                    let task_proof = tasks_merkle_tree
-                        .get_proof(&DynSolValue::FixedBytes(task_commitment, 32))
-                        .unwrap();
-                    let encoded_task = datalake_compute.encode()?;
-                    let datalake_type = datalake_compute.datalake.get_datalake_type();
-                    let property_type = datalake_compute.datalake.get_collection_type().to_index();
-                    debug!("compiled_result: {:#?}", compiled_result);
-                    let datalake_compute = ProcessedDatalakeCompute::new(
-                        Bytes::from(encoded_task),
-                        task_commitment,
-                        compiled_result,
-                        result_commitment,
-                        task_proof,
-                        result_proof,
-                        Bytes::from(datalake_compute.datalake.encode()?),
-                        datalake_type.into(),
-                        property_type,
-                    );
+                let tasks_commitments: Vec<B256> =
+                    tasks.iter().map(|task| task.commit()).collect::<Vec<_>>();
+                let tasks_merkle_tree = build_task_merkle_tree(&tasks_commitments);
+                let results_merkle_tree_result =
+                    build_result_merkle_tree(&tasks_commitments, &compiled_results.task_results);
+                let (result_merkle_tree, results_commitments) = results_merkle_tree_result;
+                let task_merkle_root = tasks_merkle_tree.root();
+                let mut combined_tasks = Vec::new();
 
-                    let task = ProcessedTask::DatalakeCompute(datalake_compute);
-                    combined_tasks.push(task);
-                }
-                TaskEnvelope::Module(module) => {
-                    let task_commitment = module.task.commit();
-                    let encoded_task = module.task.encode_task();
-                    let result_commitment = results_commitments[i];
-                    let compiled_result = compiled_results.task_results[i];
-                    debug!("compiled_result: {:#?}", compiled_result);
-                    let result_proof = result_merkle_tree
-                        .get_proof(&DynSolValue::FixedBytes(result_commitment, 32))
-                        .unwrap();
-                    let task_proof = tasks_merkle_tree
-                        .get_proof(&DynSolValue::FixedBytes(task_commitment, 32))
-                        .unwrap();
-                    let processed_module = ProcessedModule::new(
-                        Bytes::from(encoded_task),
-                        task_commitment,
-                        result_commitment,
-                        compiled_result,
-                        task_proof,
-                        result_proof,
-                        module.task.inputs,
-                        module.module_class,
-                    );
+                for (i, task) in tasks.into_iter().enumerate() {
+                    match task {
+                        TaskEnvelope::DatalakeCompute(datalake_compute) => {
+                            let task_commitment = datalake_compute.commit();
+                            let result_commitment = results_commitments[i];
+                            let compiled_result = compiled_results.task_results[i];
+                            let result_proof = result_merkle_tree
+                                .get_proof(&DynSolValue::FixedBytes(result_commitment, 32))
+                                .unwrap();
+                            let task_proof = tasks_merkle_tree
+                                .get_proof(&DynSolValue::FixedBytes(task_commitment, 32))
+                                .unwrap();
+                            let encoded_task = datalake_compute.encode()?;
+                            let datalake_type = datalake_compute.datalake.get_datalake_type();
+                            let property_type =
+                                datalake_compute.datalake.get_collection_type().to_index();
+                            debug!("compiled_result: {:#?}", compiled_result);
+                            let datalake_compute = ProcessedDatalakeCompute::new(
+                                Bytes::from(encoded_task),
+                                task_commitment,
+                                compiled_result,
+                                result_commitment,
+                                task_proof,
+                                result_proof,
+                                Bytes::from(datalake_compute.datalake.encode()?),
+                                datalake_type.into(),
+                                property_type,
+                            );
 
-                    let task = ProcessedTask::Module(processed_module);
-                    combined_tasks.push(task);
+                            let task = ProcessedTask::DatalakeCompute(datalake_compute);
+                            combined_tasks.push(task);
+                        }
+                        TaskEnvelope::Module(module) => {
+                            let task_commitment = module.task.commit();
+                            let encoded_task = module.task.encode_task();
+                            let result_commitment = results_commitments[i];
+                            let compiled_result = compiled_results.task_results[i];
+                            debug!("compiled_result: {:#?}", compiled_result);
+                            let result_proof = result_merkle_tree
+                                .get_proof(&DynSolValue::FixedBytes(result_commitment, 32))
+                                .unwrap();
+                            let task_proof = tasks_merkle_tree
+                                .get_proof(&DynSolValue::FixedBytes(task_commitment, 32))
+                                .unwrap();
+                            let processed_module = ProcessedModule::new(
+                                Bytes::from(encoded_task),
+                                task_commitment,
+                                result_commitment,
+                                compiled_result,
+                                task_proof,
+                                result_proof,
+                                module.task.inputs,
+                                module.module_class,
+                            );
+
+                            let task = ProcessedTask::Module(processed_module);
+                            combined_tasks.push(task);
+                        }
+                    }
                 }
+
+                // TODO: this chain id need to be fix
+                let proofs = compiled_results.to_processed_block_vec();
+                let processed_result = ProcessorInput::new(
+                    SOUND_CAIRO_RUN_OUTPUT_FILE.into(),
+                    result_merkle_tree.root(),
+                    task_merkle_root,
+                    proofs,
+                    combined_tasks,
+                );
+                info!("1️⃣  Preprocessor completed successfully");
+                Ok(processed_result)
             }
         }
-
-        // TODO: this chain id need to be fix
-        let proofs = compiled_results.to_processed_block_vec();
-        let processed_result = ProcessorInput::new(
-            SOUND_CAIRO_RUN_OUTPUT_FILE.into(),
-            result_merkle_tree.root(),
-            task_merkle_root,
-            proofs,
-            combined_tasks,
-        );
-        info!("1️⃣  Preprocessor completed successfully");
-        Ok(processed_result)
     }
 }
diff --git a/hdp/src/preprocessor/module_compile.rs b/hdp/src/preprocessor/module_compile.rs
new file mode 100644
index 00000000..a5f6c0d0
--- /dev/null
+++ b/hdp/src/preprocessor/module_compile.rs
@@ -0,0 +1,95 @@
+// ! This file is sketching out compilation logic that only considering module task.
+// ! We already confirmed on direction to deprecating datalake, which curren ./compile file
+//! is causing too much overhead interms of abstraction around legacy types that we supported.
+//! Ideally later this file will deprecate ./compile
+
+use alloy::primitives::U256;
+use std::collections::HashMap;
+use std::path::PathBuf;
+use tracing::info;
+
+use crate::{
+    cairo_runner::{cairo_dry_run, dry_run::DryRunResult, input::dry_run::DryRunnerProgramInput},
+    constant::DRY_CAIRO_RUN_OUTPUT_FILE,
+    primitives::{
+        processed_types::{block_proofs::ProcessedBlockProofs, cairo_format},
+        task::ExtendedModule,
+        ChainId,
+    },
+    provider::{key::categorize_fetch_keys, traits::new_provider_from_config},
+};
+
+use super::compile::{config::CompilerConfig, CompileError};
+
+#[derive(Debug, Default, PartialEq)]
+pub struct ModuleCompilationResult {
+    /// results of tasks
+    pub task_results: Vec<U256>,
+    /// proofs
+    pub proofs: HashMap<ChainId, ProcessedBlockProofs>,
+}
+
+pub async fn module_compile(
+    task: ExtendedModule,
+    compile_config: &CompilerConfig,
+) -> Result<ModuleCompilationResult, CompileError> {
+    // Log the target task for debugging purposes
+    info!("target task: {:#?}", task);
+    let dry_run_program_path = compile_config.dry_run_program_path.clone();
+
+    // Generate input for the dry run based on the extended modules
+    let dry_run_input = DryRunnerProgramInput::new(
+        PathBuf::from(DRY_CAIRO_RUN_OUTPUT_FILE),
+        vec![cairo_format::DryRunProcessedModule::new(
+            task.task.inputs,
+            task.module_class,
+        )],
+    );
+    let input_string =
+        serde_json::to_string_pretty(&dry_run_input).expect("Failed to serialize module class");
+
+    // 2. Run the dry run and retrieve the fetch points
+    info!("2. Running dry-run... ");
+    let dry_run_results: DryRunResult = cairo_dry_run(
+        dry_run_program_path,
+        input_string,
+        compile_config.save_fetch_keys_file.clone(),
+    )?;
+
+    // TODO: prob as soon as we deprecate data lake this check no need
+    // Check if the program hash matches the expected hash
+    if dry_run_results[0].program_hash != task.task.program_hash {
+        return Err(CompileError::ClassHashMismatch);
+    }
+    // Ensure only one module is supported
+    if dry_run_results.len() != 1 {
+        panic!("Multiple Modules are not supported");
+    }
+
+    // Extract the dry run module result
+    let dry_run_module = dry_run_results.into_iter().next().unwrap();
+    let task_results = vec![dry_run_module.result.into()];
+
+    // 3. Categorize fetch keys by chain ID
+    let categorized_keys = categorize_fetch_keys(dry_run_module.fetch_keys);
+
+    let mut proofs_map: std::collections::HashMap<ChainId, ProcessedBlockProofs> = HashMap::new();
+    info!("3. Fetching proofs from provider...");
+    // Loop through each chain ID and fetch proofs
+    for (chain_id, keys) in categorized_keys {
+        info!("target provider chain id: {}", chain_id);
+        let target_provider_config = compile_config
+            .provider_config
+            .get(&chain_id)
+            .expect("target task's chain had not been configured.");
+        let provider = new_provider_from_config(target_provider_config);
+        let results = provider.fetch_proofs_from_keys(keys).await?;
+        // Update the maps with fetched results
+        proofs_map.insert(chain_id, results);
+    }
+
+    Ok(ModuleCompilationResult {
+        task_results,
+        proofs: proofs_map,
+    })
+}
diff --git a/hdp/src/primitives/processed_types/block_proofs.rs b/hdp/src/primitives/processed_types/block_proofs.rs
index 19f708bf..494b1adb 100644
--- a/hdp/src/primitives/processed_types/block_proofs.rs
+++ b/hdp/src/primitives/processed_types/block_proofs.rs
@@ -35,6 +35,21 @@ impl ProcessedBlockProofs {
             ProcessedBlockProofs::StarkNet(starknet) => Some(starknet),
         }
     }
+
+    pub fn get_mmr_meta(self) -> Vec<MMRMeta> {
+        match self {
+            ProcessedBlockProofs::Evm(evm_proofs) => evm_proofs
+                .mmr_with_headers
+                .into_iter()
+                .map(|m| m.mmr_meta)
+                .collect(),
+            ProcessedBlockProofs::StarkNet(sn_proofs) => sn_proofs
+                .mmr_with_headers
+                .into_iter()
+                .map(|m| m.mmr_meta)
+                .collect(),
+        }
+    }
 }
 
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Eq, Hash)]
@@ -42,9 +57,6 @@ pub struct StarkNetBlockProofs {
     pub chain_id: String,
     pub mmr_with_headers: Vec<MMRWithHeaderStarkNet>,
     pub storages: Vec<starknet::storage::ProcessedStorage>,
-    // Since accounts, transactions, and transaction_receipts do not exist for StarkNet,
-    // we omit them or include any StarkNet-specific fields if necessary.
-    // Add any StarkNet-specific fields here.
 }
 
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Eq, Hash)]
diff --git a/hdp/src/primitives/processed_types/cairo_format/block_proofs.rs b/hdp/src/primitives/processed_types/cairo_format/block_proofs.rs
index 04dd27fd..9d97f4c4 100644
--- a/hdp/src/primitives/processed_types/cairo_format/block_proofs.rs
+++ b/hdp/src/primitives/processed_types/cairo_format/block_proofs.rs
@@ -1,7 +1,8 @@
 use serde::{Deserialize, Serialize};
 
 use crate::primitives::processed_types::{
-    block_proofs::ProcessedBlockProofs as BaseProcessedBlockProofs, mmr::MMRMeta,
+    block_proofs::{ProcessedBlockProofs as BaseProcessedBlockProofs, StarkNetBlockProofs},
+    mmr::MMRMeta,
 };
 
 use super::{
@@ -13,47 +14,58 @@ impl AsCairoFormat for BaseProcessedBlockProofs {
     type Output = ProcessedBlockProofs;
 
     fn as_cairo_format(&self) -> Self::Output {
-        let evm_proof = self.clone().get_evm_proofs().unwrap();
-        ProcessedBlockProofs {
-            chain_id: evm_proof.chain_id,
-            mmr_with_headers: evm_proof
-                .mmr_with_headers
-                .iter()
-                .map(|mmr_with_header| MMRWithHeader {
-                    mmr_meta: mmr_with_header.mmr_meta.clone(),
-                    headers: mmr_with_header
-                        .headers
-                        .iter()
-                        .map(|header| header.as_cairo_format())
-                        .collect(),
-                })
-                .collect(),
-            accounts: evm_proof
-                .accounts
-                .iter()
-                .map(|account| account.as_cairo_format())
-                .collect(),
-            storages: evm_proof
-                .storages
-                .iter()
-                .map(|storage| storage.as_cairo_format())
-                .collect(),
-            transactions: evm_proof
-                .transactions
-                .iter()
-                .map(|transaction| transaction.as_cairo_format())
-                .collect(),
-            transaction_receipts: evm_proof
-                .transaction_receipts
-                .iter()
-                .map(|receipt| receipt.as_cairo_format())
-                .collect(),
+        match self {
+            BaseProcessedBlockProofs::Evm(evm_proof) => ProcessedBlockProofs::Evm(EvmBlockProofs {
+                chain_id: evm_proof.chain_id.clone(),
+                mmr_with_headers: evm_proof
+                    .mmr_with_headers
+                    .iter()
+                    .map(|mmr_with_header| MMRWithHeader {
+                        mmr_meta: mmr_with_header.mmr_meta.clone(),
+                        headers: mmr_with_header
+                            .headers
+                            .iter()
+                            .map(|header| header.as_cairo_format())
+                            .collect(),
+                    })
+                    .collect(),
+                accounts: evm_proof
+                    .accounts
+                    .iter()
+                    .map(|account| account.as_cairo_format())
+                    .collect(),
+                storages: evm_proof
+                    .storages
+                    .iter()
+                    .map(|storage| storage.as_cairo_format())
+                    .collect(),
+                transactions: evm_proof
+                    .transactions
+                    .iter()
+                    .map(|transaction| transaction.as_cairo_format())
+                    .collect(),
+                transaction_receipts: evm_proof
+                    .transaction_receipts
+                    .iter()
+                    .map(|receipt| receipt.as_cairo_format())
+                    .collect(),
+            }),
+            BaseProcessedBlockProofs::StarkNet(sn_proofs) => {
+                ProcessedBlockProofs::StarkNet(sn_proofs.clone())
+            }
         }
     }
 }
 
 #[derive(Serialize, Deserialize)]
-pub struct ProcessedBlockProofs {
+#[serde(untagged)]
+pub enum ProcessedBlockProofs {
+    Evm(EvmBlockProofs),
+    StarkNet(StarkNetBlockProofs),
+}
+
+#[derive(Serialize, Deserialize)]
+pub struct EvmBlockProofs {
     pub chain_id: String,
     pub mmr_with_headers: Vec<MMRWithHeader>,
     pub accounts: Vec<ProcessedAccount>,
diff --git a/hdp/src/primitives/processed_types/query.rs b/hdp/src/primitives/processed_types/query.rs
index f48eec20..db6f5fe0 100644
--- a/hdp/src/primitives/processed_types/query.rs
+++ b/hdp/src/primitives/processed_types/query.rs
@@ -62,8 +62,7 @@ impl ProcessorInput {
         let mmr_metas: Vec<MMRMeta> = self
             .proofs
             .iter()
-            .flat_map(|x| x.clone().get_evm_proofs().unwrap().mmr_with_headers)
-            .map(|mmr_with_header| mmr_with_header.mmr_meta.clone())
+            .flat_map(|x| x.clone().get_mmr_meta())
             .collect();
 
         ProcessorOutput::new(