Skip to content
This repository has been archived by the owner on Jul 5, 2024. It is now read-only.

Commit

Permalink
Bytecode, and a collection of bytecodes. (#1529)
Browse files Browse the repository at this point in the history
### Description

We revisit the handling of the bytecodes in the codebase. We reduce
manual manipulation of bytes and use a more streamlined API with
eth_types::Bytecode and bus_mapping::CodeDB.

### Issue Link

Missed out work in #1391 

### Type of change

Refactor

### Contents

- Removed witness::bytecode module.
- Extend eth_types::Bytecode and bus_mapping::CodeDB.
  - eth_types::Bytecode handles a single bytecode instance, and
  - CodeDB handles a group of bytecode instances
- rewrite the bytecode circuit witness assignment.
- rewrite the bytecode circuit tests, so that overwriting will not be
included in the production code.
- deduplicated the Mock account testing code.

### Rationale

- I kept the scope of the change small to reduce the review burden.
- I will add more rationale with review comments.
  • Loading branch information
ChihChengLiang authored Jul 24, 2023
1 parent 6e69b9a commit 8a633f7
Show file tree
Hide file tree
Showing 38 changed files with 566 additions and 996 deletions.
8 changes: 4 additions & 4 deletions bus-mapping/src/circuit_input_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ impl CircuitInputBuilder<DynamicCParams> {
// Compute subcircuits parameters
let c_params = {
let max_txs = eth_block.transactions.len();
let max_bytecode = self.code_db.0.values().fold(0, |acc, a| acc + a.len() + 1);
let max_bytecode = self.code_db.num_rows_required_for_bytecode_table();

let max_calldata = eth_block
.transactions
Expand Down Expand Up @@ -437,8 +437,8 @@ pub fn keccak_inputs(block: &Block, code_db: &CodeDB) -> Result<Vec<Vec<u8>>, Er
let txs: Vec<geth_types::Transaction> = block.txs.iter().map(|tx| tx.deref().clone()).collect();
keccak_inputs.extend_from_slice(&keccak_inputs_tx_circuit(&txs, block.chain_id.as_u64())?);
// Bytecode Circuit
for bytecode in code_db.0.values() {
keccak_inputs.push(bytecode.clone());
for bytecode in code_db.clone().into_iter() {
keccak_inputs.push(bytecode.code());
}
// EVM Circuit
keccak_inputs.extend_from_slice(&block.sha3_inputs);
Expand Down Expand Up @@ -575,7 +575,7 @@ pub fn build_state_code_db(
)
}

let mut code_db = CodeDB::new();
let mut code_db = CodeDB::default();
for (_address, code) in codes {
code_db.insert(code.clone());
}
Expand Down
12 changes: 3 additions & 9 deletions bus-mapping/src/circuit_input_builder/input_state_ref.rs
Original file line number Diff line number Diff line change
Expand Up @@ -614,9 +614,8 @@ impl<'a> CircuitInputStateRef<'a> {
/// Fetch and return code for the given code hash from the code DB.
pub fn code(&self, code_hash: H256) -> Result<Vec<u8>, Error> {
self.code_db
.0
.get(&code_hash)
.cloned()
.get_from_h256(&code_hash)
.map(|bytecode| bytecode.code())
.ok_or(Error::CodeNotFound(code_hash))
}

Expand Down Expand Up @@ -1448,12 +1447,7 @@ impl<'a> CircuitInputStateRef<'a> {
let mut copy_steps = Vec::with_capacity(bytes_left as usize);
for idx in 0..bytes_left {
let addr = src_addr.checked_add(idx).unwrap_or(src_addr_end);
let step = if addr < src_addr_end {
let code = bytecode.code.get(addr as usize).unwrap();
(code.value, code.is_code)
} else {
(0, false)
};
let step = bytecode.get(addr as usize).unwrap_or_default();
copy_steps.push(step);
self.memory_write(exec_step, (dst_addr + idx).into(), step.0)?;
}
Expand Down
123 changes: 19 additions & 104 deletions bus-mapping/src/circuit_input_builder/tracer_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -364,17 +364,8 @@ fn tracer_err_address_collision() {
};

let mut code_b = Bytecode::default();
// pad code_creator to multiple of 32 bytes
let len = code_creator.to_vec().len();
let code_creator: Vec<u8> = code_creator
.to_vec()
.iter()
.cloned()
.chain(0u8..((32 - len % 32) as u8))
.collect();
for (index, word) in code_creator.chunks(32).enumerate() {
code_b.op_mstore(index * 32, Word::from_big_endian(word));
}
code_b.store_code_to_mem(&code_creator);
let len = code_creator.codesize();
let code_b_end = bytecode! {
PUSH3(0x123456) // salt
PUSH1(len) // length
Expand Down Expand Up @@ -486,17 +477,8 @@ fn tracer_create_collision_free() {
};

let mut code_b = Bytecode::default();
// pad code_creator to multiple of 32 bytes
let len = code_creator.to_vec().len();
let code_creator: Vec<u8> = code_creator
.to_vec()
.iter()
.cloned()
.chain(0u8..((32 - len % 32) as u8))
.collect();
for (index, word) in code_creator.chunks(32).enumerate() {
code_b.op_mstore(index * 32, Word::from_big_endian(word));
}
code_b.store_code_to_mem(&code_creator);
let len = code_creator.codesize();
let code_b_end = bytecode! {
PUSH1(len) // length
PUSH1(0x00) // offset
Expand Down Expand Up @@ -621,19 +603,9 @@ fn tracer_err_code_store_out_of_gas() {
};

let mut code_b = Bytecode::default();
// pad code_creator to multiple of 32 bytes
let len = code_creator.to_vec().len();
let code_creator: Vec<u8> = code_creator
.to_vec()
.iter()
.cloned()
.chain(0..(32 - len % 32) as u8)
.collect();
for (index, word) in code_creator.chunks(32).enumerate() {
code_b.op_mstore(index * 32, Word::from_big_endian(word));
}
code_b.store_code_to_mem(&code_creator);
let code_b_end = bytecode! {
PUSH32(len) // length
PUSH32(code_creator.codesize()) // length
PUSH1(0x00) // offset
PUSH1(0x00) // value
CREATE
Expand Down Expand Up @@ -769,17 +741,8 @@ fn tracer_err_invalid_code_for_create_opcode() {
};

let mut code_b = Bytecode::default();
// pad code_creator to multiple of 32 bytes
let len = code_creator.to_vec().len();
let code_creator: Vec<u8> = code_creator
.to_vec()
.iter()
.cloned()
.chain(0u8..((32 - len % 32) as u8))
.collect();
for (index, word) in code_creator.chunks(32).enumerate() {
code_b.op_mstore(index * 32, Word::from_big_endian(word));
}
code_b.store_code_to_mem(&code_creator);
let len = code_creator.codesize();
let code_b_end = bytecode! {
PUSH1(len) // length
PUSH1(0x00) // offset
Expand Down Expand Up @@ -921,19 +884,9 @@ fn tracer_err_max_code_size_exceeded() {
};

let mut code_b = Bytecode::default();
// pad code_creator to multiple of 32 bytes
let len = code_creator.to_vec().len();
let code_creator: Vec<u8> = code_creator
.to_vec()
.iter()
.cloned()
.chain(0u8..((32 - len % 32) as u8))
.collect();
for (index, word) in code_creator.chunks(32).enumerate() {
code_b.op_mstore(index * 32, Word::from_big_endian(word));
}
code_b.store_code_to_mem(&code_creator);
let code_b_end = bytecode! {
PUSH32(len) // length
PUSH32(code_creator.codesize()) // length
PUSH1(0x00) // offset
PUSH1(0x00) // value
CREATE
Expand Down Expand Up @@ -1059,19 +1012,9 @@ fn tracer_create_stop() {
};

let mut code_b = Bytecode::default();
// pad code_creator to multiple of 32 bytes
let len = code_creator.to_vec().len();
let code_creator: Vec<u8> = code_creator
.to_vec()
.iter()
.cloned()
.chain(0u8..((32 - len % 32) as u8))
.collect();
for (index, word) in code_creator.chunks(32).enumerate() {
code_b.op_mstore(index * 32, Word::from_big_endian(word));
}
code_b.store_code_to_mem(&code_creator);
let code_b_end = bytecode! {
PUSH1(len) // length
PUSH1(code_creator.codesize()) // length
PUSH1(0x00) // offset
PUSH1(0x00) // value
CREATE
Expand Down Expand Up @@ -1760,20 +1703,10 @@ fn create2_address() {
};

let mut code_b = Bytecode::default();
// pad code_creator to multiple of 32 bytes
let len = code_creator.to_vec().len();
let code_creator: Vec<u8> = code_creator
.to_vec()
.iter()
.cloned()
.chain(0u8..((32 - len % 32) as u8))
.collect();
for (index, word) in code_creator.chunks(32).enumerate() {
code_b.op_mstore(index * 32, Word::from_big_endian(word));
}
code_b.store_code_to_mem(&code_creator);
let code_b_end = bytecode! {
PUSH3(0x123456) // salt
PUSH1(len) // length
PUSH1(code_creator.codesize()) // length
PUSH1(0x00) // offset
PUSH1(0x00) // value
CREATE2
Expand Down Expand Up @@ -1853,17 +1786,8 @@ fn create_address() {
};

let mut code_b = Bytecode::default();
// pad code_creator to multiple of 32 bytes
let len = code_creator.to_vec().len();
let code_creator: Vec<u8> = code_creator
.to_vec()
.iter()
.cloned()
.chain(0u8..((32 - len % 32) as u8))
.collect();
for (index, word) in code_creator.chunks(32).enumerate() {
code_b.op_mstore(index * 32, Word::from_big_endian(word));
}
code_b.store_code_to_mem(&code_creator);
let len = code_creator.codesize();
// We do CREATE 2 times to use a nonce != 0 in the second one.
let code_b_end = bytecode! {
PUSH1(len) // length
Expand Down Expand Up @@ -2168,19 +2092,10 @@ fn test_gen_access_trace_create_push_call_stack() {
};

let mut code_b = Bytecode::default();
// pad code_creator to multiple of 32 bytes
let len = code_creator.to_vec().len();
let code_creator: Vec<u8> = code_creator
.to_vec()
.iter()
.cloned()
.chain(0u8..((32 - len % 32) as u8))
.collect();
for (index, word) in code_creator.chunks(32).enumerate() {
code_b.op_mstore(index * 32, Word::from_big_endian(word));
}
code_b.store_code_to_mem(&code_creator);

let code_b_end = bytecode! {
PUSH1(len) // length
PUSH1(code_creator.codesize()) // length
PUSH1(0x00) // offset
PUSH1(0x00) // value
CREATE
Expand Down
24 changes: 8 additions & 16 deletions bus-mapping/src/evm/opcodes/codecopy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ fn gen_copy_event(

let code_hash = state.call()?.code_hash;
let bytecode: Bytecode = state.code(code_hash)?.into();
let code_size = bytecode.code.len() as u64;
let code_size = bytecode.codesize() as u64;

// Get low Uint64 of offset to generate copy steps. Since offset could be
// Uint64 overflow if length is zero.
Expand Down Expand Up @@ -129,7 +129,6 @@ mod codecopy_tests {
circuit_input_builder::{CopyDataType, ExecState, NumberOrHash},
mock::BlockData,
operation::{MemoryOp, StackOp, RW},
state_db::CodeDB,
};

#[test]
Expand Down Expand Up @@ -202,11 +201,7 @@ mod codecopy_tests {
MemoryOp::new(
1,
MemoryAddress::from(dst_offset + idx),
if code_offset + idx < code.to_vec().len() {
code.to_vec()[code_offset + idx]
} else {
0
},
code.get_byte(code_offset + idx).unwrap_or(0),
),
)
})
Expand All @@ -216,12 +211,9 @@ mod codecopy_tests {
let copy_events = builder.block.copy_events.clone();
assert_eq!(copy_events.len(), 1);
assert_eq!(copy_events[0].bytes.len(), size);
assert_eq!(
copy_events[0].src_id,
NumberOrHash::Hash(CodeDB::hash(&code.to_vec()))
);
assert_eq!(copy_events[0].src_id, NumberOrHash::Hash(code.hash_h256()));
assert_eq!(copy_events[0].src_addr as usize, code_offset);
assert_eq!(copy_events[0].src_addr_end as usize, code.to_vec().len());
assert_eq!(copy_events[0].src_addr_end as usize, code.codesize());
assert_eq!(copy_events[0].src_type, CopyDataType::Bytecode);
assert_eq!(
copy_events[0].dst_id,
Expand All @@ -231,10 +223,10 @@ mod codecopy_tests {
assert_eq!(copy_events[0].dst_type, CopyDataType::Memory);
assert!(copy_events[0].log_id.is_none());

for (idx, (value, is_code)) in copy_events[0].bytes.iter().enumerate() {
let bytecode_element = code.get(code_offset + idx).unwrap_or_default();
assert_eq!(*value, bytecode_element.value);
assert_eq!(*is_code, bytecode_element.is_code);
for (idx, &(value, is_code)) in copy_events[0].bytes.iter().enumerate() {
let (true_value, true_is_code) = code.get(code_offset + idx).unwrap_or_default();
assert_eq!(value, true_value);
assert_eq!(is_code, true_is_code);
}
}
}
2 changes: 1 addition & 1 deletion bus-mapping/src/evm/opcodes/codesize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ mod codesize_tests {
STOP
};
code.append(&tail);
let codesize = code.to_vec().len();
let codesize = code.codesize();

let block: GethData = TestContext::<2, 1>::new(
None,
Expand Down
10 changes: 4 additions & 6 deletions bus-mapping/src/evm/opcodes/create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -291,12 +291,10 @@ fn handle_copy(
length: usize,
) -> Result<(Vec<u8>, H256), Error> {
let initialization_bytes = state.caller_ctx()?.memory.0[offset..(offset + length)].to_vec();
let code_hash = CodeDB::hash(&initialization_bytes);
let bytes: Vec<_> = Bytecode::from(initialization_bytes.clone())
.code
.iter()
.map(|element| (element.value, element.is_code))
.collect();

let initialization = Bytecode::from(initialization_bytes.clone());
let code_hash = initialization.hash_h256();
let bytes = initialization.code_vec();

let rw_counter_start = state.block_ctx.rwc;
for (i, (byte, _)) in bytes.iter().enumerate() {
Expand Down
13 changes: 4 additions & 9 deletions bus-mapping/src/evm/opcodes/extcodecopy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ fn gen_copy_event(
} else {
Bytecode::default()
};
let code_size = bytecode.code.len() as u64;
let code_size = bytecode.codesize() as u64;

// Get low Uint64 of offset to generate copy steps. Since offset could be
// Uint64 overflow if length is zero.
Expand Down Expand Up @@ -410,11 +410,7 @@ mod extcodecopy_tests {
MemoryOp::new(
expected_call_id,
MemoryAddress::from(memory_offset + idx),
if data_offset + idx < bytecode_ext.to_vec().len() {
bytecode_ext.to_vec()[data_offset + idx]
} else {
0
},
bytecode_ext.get_byte(data_offset + idx).unwrap_or(0),
),
)
})
Expand All @@ -436,10 +432,9 @@ mod extcodecopy_tests {
assert_eq!(copy_events[0].dst_type, CopyDataType::Memory);
assert!(copy_events[0].log_id.is_none());

for (idx, (value, is_code)) in copy_events[0].bytes.iter().enumerate() {
for (idx, &(value, is_code)) in copy_events[0].bytes.iter().enumerate() {
let bytecode_element = bytecode_ext.get(idx).unwrap_or_default();
assert_eq!(*value, bytecode_element.value);
assert_eq!(*is_code, bytecode_element.is_code);
assert_eq!((value, is_code), bytecode_element);
}
}

Expand Down
10 changes: 3 additions & 7 deletions bus-mapping/src/evm/opcodes/return_revert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,14 +207,10 @@ fn handle_create(
source: Source,
) -> Result<H256, Error> {
let values = state.call_ctx()?.memory.0[source.offset..source.offset + source.length].to_vec();
let code_hash = CodeDB::hash(&values);
let bytecode = Bytecode::from(values);
let code_hash = bytecode.hash_h256();
let bytes = bytecode.code_vec();
let dst_id = NumberOrHash::Hash(code_hash);
let bytes: Vec<_> = Bytecode::from(values)
.code
.iter()
.map(|element| (element.value, element.is_code))
.collect();

let rw_counter_start = state.block_ctx.rwc;
for (i, (byte, _)) in bytes.iter().enumerate() {
state.push_op(
Expand Down
2 changes: 1 addition & 1 deletion bus-mapping/src/mock.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ impl<C: CircuitsParams> BlockData<C> {

fn init_dbs(geth_data: &GethData) -> (StateDB, CodeDB) {
let mut sdb = StateDB::new();
let mut code_db = CodeDB::new();
let mut code_db = CodeDB::default();

let access_set = get_state_accesses(&geth_data.eth_block, &geth_data.geth_traces)
.expect("state accesses");
Expand Down
Loading

0 comments on commit 8a633f7

Please sign in to comment.