From 0538a42ecb4b2177b410aa74925735cf8502d610 Mon Sep 17 00:00:00 2001 From: Daejun Park Date: Fri, 4 Aug 2023 18:11:33 -0700 Subject: [PATCH 1/5] feat: support struct type --- src/halmos/__main__.py | 84 +++-------------- src/halmos/calldata.py | 203 ++++++++++++++++++++++++++++++++++++++++ tests/expected/all.json | 20 +++- tests/test/Struct.t.sol | 16 +++- 4 files changed, 249 insertions(+), 74 deletions(-) create mode 100644 src/halmos/calldata.py diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index a2a05617..cabf3b5d 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -18,6 +18,7 @@ from .utils import color_good, color_warn, hexify from .warnings import * from .parser import mk_arg_parser +from .calldata import Calldata arg_parser = mk_arg_parser() @@ -42,8 +43,9 @@ def str_tuple(args: List) -> str: ret = [] for arg in args: typ = arg["type"] - if typ == "tuple": - ret.append(str_tuple(arg["components"])) + match = re.search(r"^tuple((\[[0-9]*\])*)$", typ) + if match: + ret.append(str_tuple(arg["components"]) + match.group(1)) else: ret.append(typ) return "(" + ",".join(ret) + ")" @@ -72,74 +74,16 @@ def mk_calldata( dyn_param_size: List[str], args: Namespace, ) -> None: - item = find_abi(abi, fun_info) - tba = [] - offset = 0 - for param in item["inputs"]: - param_name = param["name"] - param_type = param["type"] - if param_type == "tuple": - # TODO: support struct types - raise NotImplementedError(f"Not supported parameter type: {param_type}") - elif param_type == "bytes" or param_type == "string": - # wstore(cd, 4+offset, 32, BitVecVal(, 256)) - tba.append((4 + offset, param)) - offset += 32 - elif param_type.endswith("[]"): - raise NotImplementedError(f"Not supported dynamic arrays: {param_type}") - else: - match = re.search( - r"(u?int[0-9]*|address|bool|bytes[0-9]+)(\[([0-9]+)\])?", param_type - ) - if not match: - raise NotImplementedError(f"Unknown parameter type: {param_type}") - typ = match.group(1) - dim = match.group(3) - if dim: # array - for idx in range(int(dim)): - wstore( - cd, 4 + offset, 32, BitVec(f"p_{param_name}[{idx}]_{typ}", 256) - ) - offset += 32 - else: # primitive - wstore(cd, 4 + offset, 32, BitVec(f"p_{param_name}_{typ}", 256)) - offset += 32 - - arrlen = mk_arrlen(args) - for loc_param in tba: - loc = loc_param[0] - param = loc_param[1] - param_name = param["name"] - param_type = param["type"] - - if param_name not in arrlen: - size = args.loop - if args.debug: - print( - f"Warning: no size provided for {param_name}; default value {size} will be used." - ) - else: - size = arrlen[param_name] - - dyn_param_size.append(f"|{param_name}|={size}") - - if param_type == "bytes" or param_type == "string": - # head - wstore(cd, loc, 32, BitVecVal(offset, 256)) - # tail - size_pad_right = int((size + 31) / 32) * 32 - wstore(cd, 4 + offset, 32, BitVecVal(size, 256)) - offset += 32 - if size_pad_right > 0: - wstore( - cd, - 4 + offset, - size_pad_right, - BitVec(f"p_{param_name}_{param_type}", 8 * size_pad_right), - ) - offset += size_pad_right - else: - raise ValueError(param_type) + # find function abi + fun_abi = find_abi(abi, fun_info) + + # generate symbolic ABI calldata + calldata = Calldata(args, mk_arrlen(args), dyn_param_size) + result = calldata.create(fun_abi) + + if result is not None: + # TODO: use Contract abstraction for calldata + wstore(cd, 4, result.size() // 8, result) def mk_callvalue() -> Word: diff --git a/src/halmos/calldata.py b/src/halmos/calldata.py new file mode 100644 index 00000000..52a9cb1a --- /dev/null +++ b/src/halmos/calldata.py @@ -0,0 +1,203 @@ +# SPDX-License-Identifier: AGPL-3.0 + +import re + +from dataclasses import dataclass +from typing import List, Dict +from argparse import Namespace +from functools import reduce + +from z3 import * + +from .sevm import con, concat + + +@dataclass(frozen=True) +class Type: + var: str + + +@dataclass(frozen=True) +class BaseType(Type): + typ: str + + +@dataclass(frozen=True) +class FixedArrayType(Type): + base: Type + size: int + + +@dataclass(frozen=True) +class DynamicArrayType(Type): + base: Type + + +@dataclass(frozen=True) +class TupleType(Type): + items: List[Type] + + +def parse_type(var: str, typ: str, item: Dict) -> Type: + """Parse ABI type in JSON format""" + + # parse array type + match = re.search(r"^(.*)(\[([0-9]*)\])$", typ) + if match: + base_type = match.group(1) + array_len = match.group(3) + + # recursively parse base type + base = parse_type("", base_type, item) + + if array_len == "": # dynamic array + return DynamicArrayType(var, base) + else: + return FixedArrayType(var, base, int(array_len)) + + # check supported type + match = re.search(r"^(u?int[0-9]*|address|bool|bytes[0-9]*|string|tuple)$", typ) + if not match: + # TODO: support fixedMxN, ufixedMxN, function types + raise NotImplementedError(f"Not supported type: {typ}") + + # parse tuple type + if typ == "tuple": + return parse_tuple_type(var, item["components"]) + + # parse primitive types + return BaseType(var, typ) + + +def parse_tuple_type(var: str, items: List[Dict]) -> Type: + return TupleType(var, [parse_type(item["name"], item["type"], item) for item in items]) + + +@dataclass(frozen=True) +class EncodingResult: + data: List[BitVecRef] + size: int + static: bool + + +class Calldata: + args: Namespace + arrlen: Dict[str, int] + dyn_param_size: List[str] # to be updated + + + def __init__(self, args: Namespace, arrlen: Dict[str, int], dyn_param_size: List[str]) -> None: + self.args = args + self.arrlen = arrlen + self.dyn_param_size = dyn_param_size + + + def choose_array_len(self, name: str) -> int: + if name in self.arrlen: + array_len = self.arrlen[name] + else: + array_len = self.args.loop + if self.args.debug: + print( + f"Warning: no size provided for {name}; default value {array_len} will be used." + ) + + self.dyn_param_size.append(f"|{name}|={array_len}") + + return array_len + + + def create(self, abi: Dict) -> BitVecRef: + """Create calldata of ABI type""" + + # list of parameter types + tuple_type = parse_tuple_type("", abi["inputs"]) + + # no parameters + if len(tuple_type.items) == 0: + return None + + # ABI encoded symbolic calldata for parameters + encoded = self.encode("", tuple_type) + result = concat(encoded.data) + + # sanity check + if result.size() != 8 * encoded.size: + raise ValueError(encoded) + + return result + + + def encode(self, name: str, typ: Type) -> EncodingResult: + """Create symbolic ABI encoded calldata + + See https://docs.soliditylang.org/en/latest/abi-spec.html + """ + + # (T1, T2, ..., Tn) + if isinstance(typ, TupleType): + prefix = f"{name}." if name else "" + items = [self.encode(f"{prefix}{item.var}", item) for item in typ.items] + return self.encode_tuple(items) + + # T[k] + if isinstance(typ, FixedArrayType): + items = [self.encode(f"{name}[{i}]", typ.base) for i in range(typ.size)] + return self.encode_tuple(items) + + # T[] + if isinstance(typ, DynamicArrayType): + array_len = self.choose_array_len(name) + items = [self.encode(f"{name}[{i}]", typ.base) for i in range(array_len)] + encoded = self.encode_tuple(items) + return EncodingResult([con(array_len)] + encoded.data, 32 + encoded.size, False) + + if isinstance(typ, BaseType): + # bytes, string + if typ.typ in ["bytes", "string"]: + size = 65 # ECDSA signature size # TODO: use args + size_pad_right = ((size + 31) // 32) * 32 + data = [con(size_pad_right), BitVec(f"p_{name}_{typ.typ}", 8 * size_pad_right)] + return EncodingResult(data, 32 + size_pad_right, False) + + # uintN, intN, address, bool, bytesN + else: + return EncodingResult([BitVec(f"p_{name}_{typ.typ}", 256)], 32, True) + + raise ValueError(typ) + + + def encode_tuple(self, items: List[EncodingResult]) -> EncodingResult: + # For X = (X(1), ..., X(k)): + # + # enc(X) = head(X(1)) ... head(X(k)) tail(X(1)) ... tail(X(k)) + # + # if Ti is static: + # head(X(i)) = enc(X(i)) + # tail(X(i)) = "" (the empty string) + # + # if Ti is dynamic: + # head(X(i)) = enc(len( head(X(1)) ... head(X(k)) tail(X(1)) ... tail(X(i-1)) )) + # tail(X(i)) = enc(X(i)) + # + # See https://docs.soliditylang.org/en/latest/abi-spec.html#formal-specification-of-the-encoding + + # compute total head size + head_size = lambda x: x.size if x.static else 32 + total_head_size = reduce(lambda s, x: s + head_size(x), items, 0) + + # generate heads and tails + total_size = total_head_size + heads, tails = [], [] + for item in items: + if item.static: + heads.extend(item.data) + else: + heads.append(con(total_size)) + tails.extend(item.data) + total_size += item.size + + # tuple is static if all elements are static + static = len(tails) == 0 + + return EncodingResult(heads + tails, total_size, static) diff --git a/tests/expected/all.json b/tests/expected/all.json index 0986b58f..beaf8ac0 100644 --- a/tests/expected/all.json +++ b/tests/expected/all.json @@ -1034,8 +1034,24 @@ "test/Struct.t.sol:StructTest": [ { "name": "check_Struct((uint256,uint256))", - "exitcode": 2, - "num_models": null, + "exitcode": 0, + "num_models": 0, + "num_paths": null, + "time": null, + "num_bounded_loops": null + }, + { + "name": "check_StructArray((uint256,uint256)[],(uint256,uint256)[2])", + "exitcode": 0, + "num_models": 0, + "num_paths": null, + "time": null, + "num_bounded_loops": null + }, + { + "name": "check_StructArrayArray((uint256,uint256)[][],(uint256,uint256)[3][],(uint256,uint256)[][5],(uint256,uint256)[7][9])", + "exitcode": 0, + "num_models": 0, "num_paths": null, "time": null, "num_bounded_loops": null diff --git a/tests/test/Struct.t.sol b/tests/test/Struct.t.sol index e4c8a3e6..939655bb 100644 --- a/tests/test/Struct.t.sol +++ b/tests/test/Struct.t.sol @@ -7,8 +7,20 @@ contract StructTest { uint y; } - // TODO: support struct parameter - function check_Struct(Point memory) public pure { + function check_Struct(Point memory p) public pure { + assert(true); + } + + function check_StructArray(Point[] memory p, Point[2] memory q) public pure { + assert(true); + } + + function check_StructArrayArray( + Point[][] memory p, + Point[3][] memory q, + Point[][5] memory r, + Point[7][9] memory s + ) public pure { assert(true); } } From 1852ba1d610552cab298a38795eb3501c08cda66 Mon Sep 17 00:00:00 2001 From: Daejun Park Date: Fri, 4 Aug 2023 18:21:20 -0700 Subject: [PATCH 2/5] fix lint --- src/halmos/calldata.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/halmos/calldata.py b/src/halmos/calldata.py index 52a9cb1a..129549aa 100644 --- a/src/halmos/calldata.py +++ b/src/halmos/calldata.py @@ -70,7 +70,9 @@ def parse_type(var: str, typ: str, item: Dict) -> Type: def parse_tuple_type(var: str, items: List[Dict]) -> Type: - return TupleType(var, [parse_type(item["name"], item["type"], item) for item in items]) + return TupleType( + var, [parse_type(item["name"], item["type"], item) for item in items] + ) @dataclass(frozen=True) @@ -85,13 +87,13 @@ class Calldata: arrlen: Dict[str, int] dyn_param_size: List[str] # to be updated - - def __init__(self, args: Namespace, arrlen: Dict[str, int], dyn_param_size: List[str]) -> None: + def __init__( + self, args: Namespace, arrlen: Dict[str, int], dyn_param_size: List[str] + ) -> None: self.args = args self.arrlen = arrlen self.dyn_param_size = dyn_param_size - def choose_array_len(self, name: str) -> int: if name in self.arrlen: array_len = self.arrlen[name] @@ -106,7 +108,6 @@ def choose_array_len(self, name: str) -> int: return array_len - def create(self, abi: Dict) -> BitVecRef: """Create calldata of ABI type""" @@ -127,7 +128,6 @@ def create(self, abi: Dict) -> BitVecRef: return result - def encode(self, name: str, typ: Type) -> EncodingResult: """Create symbolic ABI encoded calldata @@ -150,14 +150,19 @@ def encode(self, name: str, typ: Type) -> EncodingResult: array_len = self.choose_array_len(name) items = [self.encode(f"{name}[{i}]", typ.base) for i in range(array_len)] encoded = self.encode_tuple(items) - return EncodingResult([con(array_len)] + encoded.data, 32 + encoded.size, False) + return EncodingResult( + [con(array_len)] + encoded.data, 32 + encoded.size, False + ) if isinstance(typ, BaseType): # bytes, string if typ.typ in ["bytes", "string"]: size = 65 # ECDSA signature size # TODO: use args size_pad_right = ((size + 31) // 32) * 32 - data = [con(size_pad_right), BitVec(f"p_{name}_{typ.typ}", 8 * size_pad_right)] + data = [ + con(size_pad_right), + BitVec(f"p_{name}_{typ.typ}", 8 * size_pad_right), + ] return EncodingResult(data, 32 + size_pad_right, False) # uintN, intN, address, bool, bytesN @@ -166,7 +171,6 @@ def encode(self, name: str, typ: Type) -> EncodingResult: raise ValueError(typ) - def encode_tuple(self, items: List[EncodingResult]) -> EncodingResult: # For X = (X(1), ..., X(k)): # From 5c6b22eb7af69a6c76815deef105dbc294ec7719 Mon Sep 17 00:00:00 2001 From: Daejun Park Date: Fri, 4 Aug 2023 22:11:52 -0700 Subject: [PATCH 3/5] add test --- tests/expected/all.json | 24 +++++--- tests/test/Struct.t.sol | 120 +++++++++++++++++++++++++++++++++++++--- 2 files changed, 128 insertions(+), 16 deletions(-) diff --git a/tests/expected/all.json b/tests/expected/all.json index beaf8ac0..9836571f 100644 --- a/tests/expected/all.json +++ b/tests/expected/all.json @@ -1034,24 +1034,34 @@ "test/Struct.t.sol:StructTest": [ { "name": "check_Struct((uint256,uint256))", - "exitcode": 0, - "num_models": 0, + "exitcode": 1, + "num_models": 1, "num_paths": null, "time": null, "num_bounded_loops": null }, { "name": "check_StructArray((uint256,uint256)[],(uint256,uint256)[2])", - "exitcode": 0, - "num_models": 0, + "exitcode": 1, + "num_models": 1, "num_paths": null, "time": null, "num_bounded_loops": null }, { - "name": "check_StructArrayArray((uint256,uint256)[][],(uint256,uint256)[3][],(uint256,uint256)[][5],(uint256,uint256)[7][9])", - "exitcode": 0, - "num_models": 0, + "name": "check_StructArrayArray((uint256,uint256)[][],(uint256,uint256)[2][],(uint256,uint256)[][2],(uint256,uint256)[2][2])", + "exitcode": 1, + "num_models": 1, + "num_paths": null, + "time": null, + "num_bounded_loops": null + } + ], + "test/Struct.t.sol:StructTest2": [ + { + "name": "check_S((uint256,uint256[],uint256),(uint256,(uint256,uint256[],uint256),uint256[],(uint256,uint256[],uint256)[],uint256[1],(uint256,uint256[],uint256)[][])[])", + "exitcode": 1, + "num_models": 1, "num_paths": null, "time": null, "num_bounded_loops": null diff --git a/tests/test/Struct.t.sol b/tests/test/Struct.t.sol index 939655bb..321d3684 100644 --- a/tests/test/Struct.t.sol +++ b/tests/test/Struct.t.sol @@ -1,26 +1,128 @@ // SPDX-License-Identifier: AGPL-3.0 pragma solidity >=0.8.0 <0.9.0; +/// @custom:halmos --solver-timeout-assertion 0 contract StructTest { struct Point { uint x; uint y; } - function check_Struct(Point memory p) public pure { - assert(true); + function check_Struct(Point memory p) public pure returns (uint result) { + unchecked { + result += p.x + p.y; + } + assert(result == 0); } - function check_StructArray(Point[] memory p, Point[2] memory q) public pure { - assert(true); + /// @custom:halmos --array-lengths p=1 + function check_StructArray(Point[] memory p, Point[2] memory q) public pure returns (uint result) { + for (uint i = 0; i < p.length; i++) { + unchecked { + result += p[i].x + p[i].y; + } + } + for (uint i = 0; i < q.length; i++) { + unchecked { + result += q[i].x + q[i].y; + } + } + assert(result == 0); } + /// @custom:halmos --array-lengths p=1,p[0]=1 function check_StructArrayArray( Point[][] memory p, - Point[3][] memory q, - Point[][5] memory r, - Point[7][9] memory s - ) public pure { - assert(true); + Point[2][] memory q, + Point[][2] memory r, + Point[2][2] memory s + ) public pure returns (uint result) { + for (uint i = 0; i < p.length; i++) { + for (uint j = 0; j < p[i].length; j++) { + unchecked { + result += p[i][j].x + p[i][j].y; + } + } + } + for (uint i = 0; i < q.length; i++) { + for (uint j = 0; j < q[i].length; j++) { + unchecked { + result += q[i][j].x + q[i][j].y; + } + } + } + for (uint i = 0; i < r.length; i++) { + for (uint j = 0; j < r[i].length; j++) { + unchecked { + result += r[i][j].x + r[i][j].y; + } + } + } + for (uint i = 0; i < s.length; i++) { + for (uint j = 0; j < s[i].length; j++) { + unchecked { + result += s[i][j].x + s[i][j].y; + } + } + } + assert(result == 0); + } +} + +/// @custom:halmos --solver-timeout-assertion 0 +contract StructTest2 { + struct P { + uint x; + uint[] y; + uint z; + } + + struct S { + uint f1; + P f2; + uint[] f3; + P[] f4; + uint[1] f5; + P[][] f6; + } + + /// @custom:halmos --array-lengths s=1 + function check_S(P memory p, S[] memory s) public pure returns (uint result) { + unchecked { + result += sum_P(p); + for (uint i = 0; i < s.length; i++) { + result += sum_S(s[i]); + } + } + assert(result == 0); + } + + function sum_P(P memory p) internal pure returns (uint result) { + unchecked { + result += p.x; + for (uint i = 0; i < p.y.length; i++) { + result += p.y[i]; + } + result += p.z; + } + } + + function sum_S(S memory s) internal pure returns (uint result) { + unchecked { + result += s.f1; + result += sum_P(s.f2); + for (uint i = 0; i < s.f3.length; i++) { + result += s.f3[i]; + } + for (uint i = 0; i < s.f4.length; i++) { + result += sum_P(s.f4[i]); + } + result += s.f5[0]; + for (uint i = 0; i < s.f6.length; i++) { + for (uint j = 0; j < s.f6[i].length; j++) { + result += sum_P(s.f6[i][j]); + } + } + } } } From 17fc57ae271956c03d86fc93d9ab288f0f5bacf9 Mon Sep 17 00:00:00 2001 From: Daejun Park Date: Fri, 4 Aug 2023 22:21:26 -0700 Subject: [PATCH 4/5] cleanup --- src/halmos/__main__.py | 9 ++++++--- src/halmos/calldata.py | 5 ++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index cabf3b5d..ca13f5ee 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -77,13 +77,16 @@ def mk_calldata( # find function abi fun_abi = find_abi(abi, fun_info) + # no parameters + if len(fun_abi["inputs"]) == 0: + return + # generate symbolic ABI calldata calldata = Calldata(args, mk_arrlen(args), dyn_param_size) result = calldata.create(fun_abi) - if result is not None: - # TODO: use Contract abstraction for calldata - wstore(cd, 4, result.size() // 8, result) + # TODO: use Contract abstraction for calldata + wstore(cd, 4, result.size() // 8, result) def mk_callvalue() -> Word: diff --git a/src/halmos/calldata.py b/src/halmos/calldata.py index 129549aa..9588ec6a 100644 --- a/src/halmos/calldata.py +++ b/src/halmos/calldata.py @@ -70,9 +70,8 @@ def parse_type(var: str, typ: str, item: Dict) -> Type: def parse_tuple_type(var: str, items: List[Dict]) -> Type: - return TupleType( - var, [parse_type(item["name"], item["type"], item) for item in items] - ) + parsed_items = [parse_type(item["name"], item["type"], item) for item in items] + return TupleType(var, parsed_items) @dataclass(frozen=True) From e95903a9e7a6a35e05fbba0fdc9692dceac55523 Mon Sep 17 00:00:00 2001 From: Daejun Park Date: Fri, 4 Aug 2023 22:27:04 -0700 Subject: [PATCH 5/5] add comment --- tests/test/Struct.t.sol | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test/Struct.t.sol b/tests/test/Struct.t.sol index 321d3684..d2cdef42 100644 --- a/tests/test/Struct.t.sol +++ b/tests/test/Struct.t.sol @@ -12,7 +12,7 @@ contract StructTest { unchecked { result += p.x + p.y; } - assert(result == 0); + assert(result == 0); // expected to fail and generate a counterexample that incorporates all calldata symbols } /// @custom:halmos --array-lengths p=1 @@ -27,7 +27,7 @@ contract StructTest { result += q[i].x + q[i].y; } } - assert(result == 0); + assert(result == 0); // expected to fail and generate a counterexample that incorporates all calldata symbols } /// @custom:halmos --array-lengths p=1,p[0]=1 @@ -65,7 +65,7 @@ contract StructTest { } } } - assert(result == 0); + assert(result == 0); // expected to fail and generate a counterexample that incorporates all calldata symbols } } @@ -94,7 +94,7 @@ contract StructTest2 { result += sum_S(s[i]); } } - assert(result == 0); + assert(result == 0); // expected to fail and generate a counterexample that incorporates all calldata symbols } function sum_P(P memory p) internal pure returns (uint result) {