From 399a7cba5ffb1d78bad62d66b143d329e35925e4 Mon Sep 17 00:00:00 2001
From: Daejun Park <daejunpark@gmail.com>
Date: Tue, 8 Aug 2023 12:08:45 -0700
Subject: [PATCH 1/2] feat: support struct type (#161)

---
 src/halmos/__main__.py  |  87 ++++-------------
 src/halmos/calldata.py  | 206 ++++++++++++++++++++++++++++++++++++++++
 tests/expected/all.json |  30 +++++-
 tests/test/Struct.t.sol | 120 ++++++++++++++++++++++-
 4 files changed, 368 insertions(+), 75 deletions(-)
 create mode 100644 src/halmos/calldata.py

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index a2a05617..ca13f5ee 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -18,6 +18,7 @@
 from .utils import color_good, color_warn, hexify
 from .warnings import *
 from .parser import mk_arg_parser
+from .calldata import Calldata
 
 arg_parser = mk_arg_parser()
 
@@ -42,8 +43,9 @@ def str_tuple(args: List) -> str:
         ret = []
         for arg in args:
             typ = arg["type"]
-            if typ == "tuple":
-                ret.append(str_tuple(arg["components"]))
+            match = re.search(r"^tuple((\[[0-9]*\])*)$", typ)
+            if match:
+                ret.append(str_tuple(arg["components"]) + match.group(1))
             else:
                 ret.append(typ)
         return "(" + ",".join(ret) + ")"
@@ -72,74 +74,19 @@ def mk_calldata(
     dyn_param_size: List[str],
     args: Namespace,
 ) -> None:
-    item = find_abi(abi, fun_info)
-    tba = []
-    offset = 0
-    for param in item["inputs"]:
-        param_name = param["name"]
-        param_type = param["type"]
-        if param_type == "tuple":
-            # TODO: support struct types
-            raise NotImplementedError(f"Not supported parameter type: {param_type}")
-        elif param_type == "bytes" or param_type == "string":
-            # wstore(cd, 4+offset, 32, BitVecVal(<?offset?>, 256))
-            tba.append((4 + offset, param))
-            offset += 32
-        elif param_type.endswith("[]"):
-            raise NotImplementedError(f"Not supported dynamic arrays: {param_type}")
-        else:
-            match = re.search(
-                r"(u?int[0-9]*|address|bool|bytes[0-9]+)(\[([0-9]+)\])?", param_type
-            )
-            if not match:
-                raise NotImplementedError(f"Unknown parameter type: {param_type}")
-            typ = match.group(1)
-            dim = match.group(3)
-            if dim:  # array
-                for idx in range(int(dim)):
-                    wstore(
-                        cd, 4 + offset, 32, BitVec(f"p_{param_name}[{idx}]_{typ}", 256)
-                    )
-                    offset += 32
-            else:  # primitive
-                wstore(cd, 4 + offset, 32, BitVec(f"p_{param_name}_{typ}", 256))
-                offset += 32
-
-    arrlen = mk_arrlen(args)
-    for loc_param in tba:
-        loc = loc_param[0]
-        param = loc_param[1]
-        param_name = param["name"]
-        param_type = param["type"]
-
-        if param_name not in arrlen:
-            size = args.loop
-            if args.debug:
-                print(
-                    f"Warning: no size provided for {param_name}; default value {size} will be used."
-                )
-        else:
-            size = arrlen[param_name]
-
-        dyn_param_size.append(f"|{param_name}|={size}")
-
-        if param_type == "bytes" or param_type == "string":
-            # head
-            wstore(cd, loc, 32, BitVecVal(offset, 256))
-            # tail
-            size_pad_right = int((size + 31) / 32) * 32
-            wstore(cd, 4 + offset, 32, BitVecVal(size, 256))
-            offset += 32
-            if size_pad_right > 0:
-                wstore(
-                    cd,
-                    4 + offset,
-                    size_pad_right,
-                    BitVec(f"p_{param_name}_{param_type}", 8 * size_pad_right),
-                )
-                offset += size_pad_right
-        else:
-            raise ValueError(param_type)
+    # find function abi
+    fun_abi = find_abi(abi, fun_info)
+
+    # no parameters
+    if len(fun_abi["inputs"]) == 0:
+        return
+
+    # generate symbolic ABI calldata
+    calldata = Calldata(args, mk_arrlen(args), dyn_param_size)
+    result = calldata.create(fun_abi)
+
+    # TODO: use Contract abstraction for calldata
+    wstore(cd, 4, result.size() // 8, result)
 
 
 def mk_callvalue() -> Word:
diff --git a/src/halmos/calldata.py b/src/halmos/calldata.py
new file mode 100644
index 00000000..9588ec6a
--- /dev/null
+++ b/src/halmos/calldata.py
@@ -0,0 +1,206 @@
+# SPDX-License-Identifier: AGPL-3.0
+
+import re
+
+from dataclasses import dataclass
+from typing import List, Dict
+from argparse import Namespace
+from functools import reduce
+
+from z3 import *
+
+from .sevm import con, concat
+
+
+@dataclass(frozen=True)
+class Type:
+    var: str
+
+
+@dataclass(frozen=True)
+class BaseType(Type):
+    typ: str
+
+
+@dataclass(frozen=True)
+class FixedArrayType(Type):
+    base: Type
+    size: int
+
+
+@dataclass(frozen=True)
+class DynamicArrayType(Type):
+    base: Type
+
+
+@dataclass(frozen=True)
+class TupleType(Type):
+    items: List[Type]
+
+
+def parse_type(var: str, typ: str, item: Dict) -> Type:
+    """Parse ABI type in JSON format"""
+
+    # parse array type
+    match = re.search(r"^(.*)(\[([0-9]*)\])$", typ)
+    if match:
+        base_type = match.group(1)
+        array_len = match.group(3)
+
+        # recursively parse base type
+        base = parse_type("", base_type, item)
+
+        if array_len == "":  # dynamic array
+            return DynamicArrayType(var, base)
+        else:
+            return FixedArrayType(var, base, int(array_len))
+
+    # check supported type
+    match = re.search(r"^(u?int[0-9]*|address|bool|bytes[0-9]*|string|tuple)$", typ)
+    if not match:
+        # TODO: support fixedMxN, ufixedMxN, function types
+        raise NotImplementedError(f"Not supported type: {typ}")
+
+    # parse tuple type
+    if typ == "tuple":
+        return parse_tuple_type(var, item["components"])
+
+    # parse primitive types
+    return BaseType(var, typ)
+
+
+def parse_tuple_type(var: str, items: List[Dict]) -> Type:
+    parsed_items = [parse_type(item["name"], item["type"], item) for item in items]
+    return TupleType(var, parsed_items)
+
+
+@dataclass(frozen=True)
+class EncodingResult:
+    data: List[BitVecRef]
+    size: int
+    static: bool
+
+
+class Calldata:
+    args: Namespace
+    arrlen: Dict[str, int]
+    dyn_param_size: List[str]  # to be updated
+
+    def __init__(
+        self, args: Namespace, arrlen: Dict[str, int], dyn_param_size: List[str]
+    ) -> None:
+        self.args = args
+        self.arrlen = arrlen
+        self.dyn_param_size = dyn_param_size
+
+    def choose_array_len(self, name: str) -> int:
+        if name in self.arrlen:
+            array_len = self.arrlen[name]
+        else:
+            array_len = self.args.loop
+            if self.args.debug:
+                print(
+                    f"Warning: no size provided for {name}; default value {array_len} will be used."
+                )
+
+        self.dyn_param_size.append(f"|{name}|={array_len}")
+
+        return array_len
+
+    def create(self, abi: Dict) -> BitVecRef:
+        """Create calldata of ABI type"""
+
+        # list of parameter types
+        tuple_type = parse_tuple_type("", abi["inputs"])
+
+        # no parameters
+        if len(tuple_type.items) == 0:
+            return None
+
+        # ABI encoded symbolic calldata for parameters
+        encoded = self.encode("", tuple_type)
+        result = concat(encoded.data)
+
+        # sanity check
+        if result.size() != 8 * encoded.size:
+            raise ValueError(encoded)
+
+        return result
+
+    def encode(self, name: str, typ: Type) -> EncodingResult:
+        """Create symbolic ABI encoded calldata
+
+        See https://docs.soliditylang.org/en/latest/abi-spec.html
+        """
+
+        # (T1, T2, ..., Tn)
+        if isinstance(typ, TupleType):
+            prefix = f"{name}." if name else ""
+            items = [self.encode(f"{prefix}{item.var}", item) for item in typ.items]
+            return self.encode_tuple(items)
+
+        # T[k]
+        if isinstance(typ, FixedArrayType):
+            items = [self.encode(f"{name}[{i}]", typ.base) for i in range(typ.size)]
+            return self.encode_tuple(items)
+
+        # T[]
+        if isinstance(typ, DynamicArrayType):
+            array_len = self.choose_array_len(name)
+            items = [self.encode(f"{name}[{i}]", typ.base) for i in range(array_len)]
+            encoded = self.encode_tuple(items)
+            return EncodingResult(
+                [con(array_len)] + encoded.data, 32 + encoded.size, False
+            )
+
+        if isinstance(typ, BaseType):
+            # bytes, string
+            if typ.typ in ["bytes", "string"]:
+                size = 65  # ECDSA signature size  # TODO: use args
+                size_pad_right = ((size + 31) // 32) * 32
+                data = [
+                    con(size_pad_right),
+                    BitVec(f"p_{name}_{typ.typ}", 8 * size_pad_right),
+                ]
+                return EncodingResult(data, 32 + size_pad_right, False)
+
+            # uintN, intN, address, bool, bytesN
+            else:
+                return EncodingResult([BitVec(f"p_{name}_{typ.typ}", 256)], 32, True)
+
+        raise ValueError(typ)
+
+    def encode_tuple(self, items: List[EncodingResult]) -> EncodingResult:
+        # For X = (X(1), ..., X(k)):
+        #
+        # enc(X) = head(X(1)) ... head(X(k)) tail(X(1)) ... tail(X(k))
+        #
+        # if Ti is static:
+        #   head(X(i)) = enc(X(i))
+        #   tail(X(i)) = "" (the empty string)
+        #
+        # if Ti is dynamic:
+        #   head(X(i)) = enc(len( head(X(1)) ... head(X(k)) tail(X(1)) ... tail(X(i-1)) ))
+        #   tail(X(i)) = enc(X(i))
+        #
+        # See https://docs.soliditylang.org/en/latest/abi-spec.html#formal-specification-of-the-encoding
+
+        # compute total head size
+        head_size = lambda x: x.size if x.static else 32
+        total_head_size = reduce(lambda s, x: s + head_size(x), items, 0)
+
+        # generate heads and tails
+        total_size = total_head_size
+        heads, tails = [], []
+        for item in items:
+            if item.static:
+                heads.extend(item.data)
+            else:
+                heads.append(con(total_size))
+                tails.extend(item.data)
+                total_size += item.size
+
+        # tuple is static if all elements are static
+        static = len(tails) == 0
+
+        return EncodingResult(heads + tails, total_size, static)
diff --git a/tests/expected/all.json b/tests/expected/all.json
index 0986b58f..9836571f 100644
--- a/tests/expected/all.json
+++ b/tests/expected/all.json
@@ -1034,8 +1034,34 @@
         "test/Struct.t.sol:StructTest": [
             {
                 "name": "check_Struct((uint256,uint256))",
-                "exitcode": 2,
-                "num_models": null,
+                "exitcode": 1,
+                "num_models": 1,
+                "num_paths": null,
+                "time": null,
+                "num_bounded_loops": null
+            },
+            {
+                "name": "check_StructArray((uint256,uint256)[],(uint256,uint256)[2])",
+                "exitcode": 1,
+                "num_models": 1,
+                "num_paths": null,
+                "time": null,
+                "num_bounded_loops": null
+            },
+            {
+                "name": "check_StructArrayArray((uint256,uint256)[][],(uint256,uint256)[2][],(uint256,uint256)[][2],(uint256,uint256)[2][2])",
+                "exitcode": 1,
+                "num_models": 1,
+                "num_paths": null,
+                "time": null,
+                "num_bounded_loops": null
+            }
+        ],
+        "test/Struct.t.sol:StructTest2": [
+            {
+                "name": "check_S((uint256,uint256[],uint256),(uint256,(uint256,uint256[],uint256),uint256[],(uint256,uint256[],uint256)[],uint256[1],(uint256,uint256[],uint256)[][])[])",
+                "exitcode": 1,
+                "num_models": 1,
                 "num_paths": null,
                 "time": null,
                 "num_bounded_loops": null
diff --git a/tests/test/Struct.t.sol b/tests/test/Struct.t.sol
index e4c8a3e6..d2cdef42 100644
--- a/tests/test/Struct.t.sol
+++ b/tests/test/Struct.t.sol
@@ -1,14 +1,128 @@
 // SPDX-License-Identifier: AGPL-3.0
 pragma solidity >=0.8.0 <0.9.0;
 
+/// @custom:halmos --solver-timeout-assertion 0
 contract StructTest {
     struct Point {
         uint x;
         uint y;
     }
 
-    // TODO: support struct parameter
-    function check_Struct(Point memory) public pure {
-        assert(true);
+    function check_Struct(Point memory p) public pure returns (uint result) {
+        unchecked {
+            result += p.x + p.y;
+        }
+        assert(result == 0); // expected to fail and generate a counterexample that incorporates all calldata symbols
+    }
+
+    /// @custom:halmos --array-lengths p=1
+    function check_StructArray(Point[] memory p, Point[2] memory q) public pure returns (uint result) {
+        for (uint i = 0; i < p.length; i++) {
+            unchecked {
+                result += p[i].x + p[i].y;
+            }
+        }
+        for (uint i = 0; i < q.length; i++) {
+            unchecked {
+                result += q[i].x + q[i].y;
+            }
+        }
+        assert(result == 0); // expected to fail and generate a counterexample that incorporates all calldata symbols
+    }
+
+    /// @custom:halmos --array-lengths p=1,p[0]=1
+    function check_StructArrayArray(
+        Point[][] memory p,
+        Point[2][] memory q,
+        Point[][2] memory r,
+        Point[2][2] memory s
+    ) public pure returns (uint result) {
+        for (uint i = 0; i < p.length; i++) {
+            for (uint j = 0; j < p[i].length; j++) {
+                unchecked {
+                    result += p[i][j].x + p[i][j].y;
+                }
+            }
+        }
+        for (uint i = 0; i < q.length; i++) {
+            for (uint j = 0; j < q[i].length; j++) {
+                unchecked {
+                    result += q[i][j].x + q[i][j].y;
+                }
+            }
+        }
+        for (uint i = 0; i < r.length; i++) {
+            for (uint j = 0; j < r[i].length; j++) {
+                unchecked {
+                    result += r[i][j].x + r[i][j].y;
+                }
+            }
+        }
+        for (uint i = 0; i < s.length; i++) {
+            for (uint j = 0; j < s[i].length; j++) {
+                unchecked {
+                    result += s[i][j].x + s[i][j].y;
+                }
+            }
+        }
+        assert(result == 0); // expected to fail and generate a counterexample that incorporates all calldata symbols
+    }
+}
+
+/// @custom:halmos --solver-timeout-assertion 0
+contract StructTest2 {
+    struct P {
+        uint x;
+        uint[] y;
+        uint z;
+    }
+
+    struct S {
+        uint f1;
+        P f2;
+        uint[] f3;
+        P[] f4;
+        uint[1] f5;
+        P[][] f6;
+    }
+
+    /// @custom:halmos --array-lengths s=1
+    function check_S(P memory p, S[] memory s) public pure returns (uint result) {
+        unchecked {
+            result += sum_P(p);
+            for (uint i = 0; i < s.length; i++) {
+                result += sum_S(s[i]);
+            }
+        }
+        assert(result == 0); // expected to fail and generate a counterexample that incorporates all calldata symbols
+    }
+
+    function sum_P(P memory p) internal pure returns (uint result) {
+        unchecked {
+            result += p.x;
+            for (uint i = 0; i < p.y.length; i++) {
+                result += p.y[i];
+            }
+            result += p.z;
+        }
+    }
+
+    function sum_S(S memory s) internal pure returns (uint result) {
+        unchecked {
+            result += s.f1;
+            result += sum_P(s.f2);
+            for (uint i = 0; i < s.f3.length; i++) {
+                result += s.f3[i];
+            }
+            for (uint i = 0; i < s.f4.length; i++) {
+                result += sum_P(s.f4[i]);
+            }
+            result += s.f5[0];
+            for (uint i = 0; i < s.f6.length; i++) {
+                for (uint j = 0; j < s.f6[i].length; j++) {
+                    result += sum_P(s.f6[i][j]);
+                }
+            }
+        }
     }
 }

From 1bafa77b8c2b20857e12547616bc43088486bb36 Mon Sep 17 00:00:00 2001
From: Daejun Park <daejunpark@gmail.com>
Date: Tue, 8 Aug 2023 12:11:32 -0700
Subject: [PATCH 2/2] feat: add --no-test-constructor option (#162)

---
 src/halmos/__main__.py | 51 +++++++++++++++++++++++++++++-------------
 src/halmos/parser.py   |  5 +++++
 2 files changed, 41 insertions(+), 15 deletions(-)

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index ca13f5ee..37ec575b 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -180,14 +180,14 @@ def run_bytecode(hexcode: str, args: Namespace) -> List[Exec]:
 
 
 def deploy_test(
-    hexcode: str,
+    creation_hexcode: str,
+    deployed_hexcode: str,
     sevm: SEVM,
     args: Namespace,
 ) -> Exec:
     # test contract creation bytecode
-    creation_bytecode = Contract.from_hexcode(hexcode)
+    creation_bytecode = Contract.from_hexcode(creation_hexcode)
 
-    solver = mk_solver(args)
     this = mk_this()
 
     ex = sevm.mk_exec(
@@ -201,9 +201,16 @@ def deploy_test(
         this=this,
         pgm=creation_bytecode,
         symbolic=False,
-        solver=solver,
+        solver=mk_solver(args),
     )
 
+    # use the given deployed bytecode if --no-test-constructor is enabled
+    if args.no_test_constructor:
+        deployed_bytecode = Contract.from_hexcode(deployed_hexcode)
+        ex.code[this] = deployed_bytecode
+        ex.pgm = deployed_bytecode
+        return ex
+
     # create test contract
     (exs, _, _) = sevm.run(ex)
 
@@ -232,7 +239,8 @@ def deploy_test(
 
 
 def setup(
-    hexcode: str,
+    creation_hexcode: str,
+    deployed_hexcode: str,
     abi: List,
     setup_info: FunctionInfo,
     args: Namespace,
@@ -241,7 +249,7 @@ def setup(
 
     sevm = SEVM(mk_options(args))
 
-    setup_ex = deploy_test(hexcode, sevm, args)
+    setup_ex = deploy_test(creation_hexcode, deployed_hexcode, sevm, args)
 
     setup_mid = timer()
 
@@ -537,7 +545,8 @@ def run(
 
 @dataclass(frozen=True)
 class SetupAndRunSingleArgs:
-    hexcode: str
+    creation_hexcode: str
+    deployed_hexcode: str
     abi: List
     setup_info: FunctionInfo
     fun_info: FunctionInfo
@@ -549,7 +558,8 @@ def setup_and_run_single(fn_args: SetupAndRunSingleArgs) -> List[TestResult]:
     args = fn_args.args
     try:
         setup_ex = setup(
-            fn_args.hexcode,
+            fn_args.creation_hexcode,
+            fn_args.deployed_hexcode,
             fn_args.abi,
             fn_args.setup_info,
             fn_args.setup_args,
@@ -604,7 +614,8 @@ class RunArgs:
     funsigs: List[str]
 
     # code of the current contract
-    hexcode: str
+    creation_hexcode: str
+    deployed_hexcode: str
 
     abi: List
     methodIdentifiers: Dict[str, str]
@@ -615,8 +626,9 @@ class RunArgs:
 
 def run_parallel(run_args: RunArgs) -> List[TestResult]:
     args = run_args.args
-    hexcode, abi, methodIdentifiers = (
-        run_args.hexcode,
+    creation_hexcode, deployed_hexcode, abi, methodIdentifiers = (
+        run_args.creation_hexcode,
+        run_args.deployed_hexcode,
         run_args.abi,
         run_args.methodIdentifiers,
     )
@@ -629,7 +641,8 @@ def run_parallel(run_args: RunArgs) -> List[TestResult]:
     ]
     single_run_args = [
         SetupAndRunSingleArgs(
-            hexcode,
+            creation_hexcode,
+            deployed_hexcode,
             abi,
             setup_info,
             fun_info,
@@ -654,7 +667,13 @@ def run_sequential(run_args: RunArgs) -> List[TestResult]:
         setup_args = extend_args(
             args, parse_devdoc(setup_info.sig, run_args.contract_json)
         )
-        setup_ex = setup(run_args.hexcode, run_args.abi, setup_info, setup_args)
+        setup_ex = setup(
+            run_args.creation_hexcode,
+            run_args.deployed_hexcode,
+            run_args.abi,
+            setup_info,
+            setup_args,
+        )
     except Exception as err:
         print(
             color_warn(f"Error: {setup_info.sig} failed: {type(err).__name__}: {err}")
@@ -1057,7 +1076,8 @@ def _main(_args=None) -> MainResult:
                 if contract_type != "contract":
                     continue
 
-                hexcode = contract_json["bytecode"]["object"]
+                creation_hexcode = contract_json["bytecode"]["object"]
+                deployed_hexcode = contract_json["deployedBytecode"]["object"]
                 abi = contract_json["abi"]
                 methodIdentifiers = contract_json["methodIdentifiers"]
 
@@ -1081,7 +1101,8 @@ def _main(_args=None) -> MainResult:
 
                     run_args = RunArgs(
                         funsigs,
-                        hexcode,
+                        creation_hexcode,
+                        deployed_hexcode,
                         abi,
                         methodIdentifiers,
                         contract_args,
diff --git a/src/halmos/parser.py b/src/halmos/parser.py
index bdaa1829..f090fa43 100644
--- a/src/halmos/parser.py
+++ b/src/halmos/parser.py
@@ -54,6 +54,11 @@ def mk_arg_parser() -> argparse.ArgumentParser:
     parser.add_argument(
         "--symbolic-msg-sender", action="store_true", help="set msg.sender symbolic"
     )
+    parser.add_argument(
+        "--no-test-constructor",
+        action="store_true",
+        help="do not run the constructor of test contracts",
+    )
 
     parser.add_argument(
         "--version", action="store_true", help="print the version number"