Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: concretize calldatacopy #364

Merged
merged 12 commits into from
Sep 18, 2024
44 changes: 43 additions & 1 deletion src/halmos/bytevec.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from typing import Any, ForwardRef

from sortedcontainers import SortedDict
from z3 import BitVecRef, If, eq, is_bool, is_bv, is_bv_value, simplify
from z3 import BitVecRef, If, eq, is_bool, is_bv, is_bv_value, simplify, substitute

from .utils import (
Byte,
Expand Down Expand Up @@ -142,6 +142,33 @@ def __getitem__(self, key) -> Byte | ForwardRef("Chunk"):
def __len__(self) -> int:
return self.length

def concretize(self, substitution: dict[BitVecRef, BitVecRef]) -> "Chunk":
"""
Replace all symbols in the data with their corresponding concrete values,
if they exist in the given substitution mapping.

Return a new object with concrete values, or self if no substitution is made.
"""

if not isinstance(self, SymbolicChunk):
return self

data = self.data
# TODO: call substitute() only when the data contains variables to be substituted
new_data = substitute(data, *substitution.items())
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am slightly worried about the perf impact of substitute always constructing a new object, but I guess we'll see how it looks like when I profile (not a blocker)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, proper profiling would be greatly helpful. based on my performance tests, it gets slower if data is a complex term, e.g., LibUint1024Test:testProveSubAdd().

i'm thinking applying this substitution to selected int_of() only, rather than blindly applying to all calldatacopy. would be a separate pr though.


# NOTE: `new_data is data` doesn't work, because substitute() always returns a new object even if no substitution is made
if eq(new_data, data):
return self

new_data = simplify(new_data)

return (
ConcreteChunk(bv_value_to_bytes(new_data), self.start, self.length)
if is_bv_value(new_data)
else SymbolicChunk(new_data, self.start, self.length)
)

@abstractmethod
def get_byte(self, offset) -> Byte:
raise NotImplementedError
Expand Down Expand Up @@ -610,6 +637,21 @@ def set_word(self, offset: int, value: Word) -> None:

self.set_slice(offset, offset + 32, value)

def concretize(self, substitution: dict[BitVecRef, BitVecRef]) -> "ByteVec":
"""
Replace all symbols in the chunks with their corresponding concrete values,
if they exist in the given substitution mapping.

Return a new ByteVec object even if no substitution is made.
"""

result = ByteVec()

for chunk in self.chunks.values():
result.append(chunk.concretize(substitution))

return result

### read operations

def slice(self, start: int, stop: int) -> "ByteVec":
Expand Down
12 changes: 10 additions & 2 deletions src/halmos/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ class Config:

default_bytes_lengths: str = arg(
help="set the default length candidates for bytes and string not specified in --array-lengths",
global_default="0,32,1024,65", # 65 is ECDSA signature size
global_default="0,1024,65", # 65 is ECDSA signature size
metavar="LENGTH1,LENGTH2,...",
action=ParseCSV,
)
Expand Down Expand Up @@ -549,7 +549,15 @@ def parse_dict(self, parsed: dict, source: str = "halmos.toml") -> dict:
)
sys.exit(2)

return {k.replace("-", "_"): v for k, v in data.items()}
def parse_value(k, v):
if k == "default-bytes-lengths":
return ParseCSV.parse(v)
elif k == "array-lengths":
return ParseArrayLengths.parse(v)
else:
return v

return {k.replace("-", "_"): parse_value(k, v) for k, v in data.items()}
daejunpark marked this conversation as resolved.
Show resolved Hide resolved


def _create_default_config() -> "Config":
Expand Down
7 changes: 5 additions & 2 deletions src/halmos/sevm.py
Original file line number Diff line number Diff line change
Expand Up @@ -644,8 +644,10 @@ class Concretization:
In the future, it may be used for other purposes, such as concrete hash reasoning.
"""

substitution: dict = field(default_factory=dict) # symbol -> constant
candidates: dict = field(default_factory=dict) # symbol -> set of constants
# symbol -> constant
substitution: dict[BitVecRef, BitVecRef] = field(default_factory=dict)
daejunpark marked this conversation as resolved.
Show resolved Hide resolved
# symbol -> set of constants
candidates: dict[BitVecRef, list[int]] = field(default_factory=dict)

def process_cond(self, cond):
if not is_eq(cond):
Expand Down Expand Up @@ -2864,6 +2866,7 @@ def finalize(ex: Exec):

if size > 0:
data: ByteVec = ex.calldata().slice(offset, offset + size)
data = data.concretize(ex.path.concretization.substitution)
ex.st.memory.set_slice(loc, end_loc, data)

elif opcode == EVM.CODECOPY:
Expand Down
Loading