From f5acded52971d0ae3e1672af44e7332494507204 Mon Sep 17 00:00:00 2001
From: Daejun Park <daejunpark@gmail.com>
Date: Tue, 13 Aug 2024 18:47:17 -0700
Subject: [PATCH] fix: smt encoding for evm div-by-zero (#271)

---
 src/halmos/__main__.py            | 20 +++++++++++++-------
 src/halmos/sevm.py                | 12 ++++++------
 tests/expected/all.json           | 18 ++++++++++++++++++
 tests/regression/test/Arith.t.sol | 25 +++++++++++++++++++++++++
 4 files changed, 62 insertions(+), 13 deletions(-)

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index ee3c0bd5..2277b17b 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -1205,17 +1205,23 @@ def is_unknown(result: CheckSatResult, model: Model) -> bool:
 
 def refine(query: SMTQuery) -> SMTQuery:
     smtlib = query.smtlib
+
     # replace uninterpreted abstraction with actual symbols for assertion solving
-    # TODO: replace `(f_evm_bvudiv x y)` with `(ite (= y (_ bv0 256)) (_ bv0 256) (bvudiv x y))`
-    #       as bvudiv is undefined when y = 0; also similarly for f_evm_bvurem
-    smtlib = re.sub(r"(\(\s*)f_evm_(bv[a-z]+)(_[0-9]+)?\b", r"\1\2", smtlib)
-    # remove the uninterpreted function symbols
-    # TODO: this will be no longer needed once is_model_valid is properly implemented
     smtlib = re.sub(
-        r"\(\s*declare-fun\s+f_evm_(bv[a-z]+)(_[0-9]+)?\b",
-        r"(declare-fun dummy_\1\2",
+        r"\(declare-fun f_evm_(bvmul)_([0-9]+) \(\(_ BitVec \2\) \(_ BitVec \2\)\) \(_ BitVec \2\)\)",
+        r"(define-fun f_evm_\1_\2 ((x (_ BitVec \2)) (y (_ BitVec \2))) (_ BitVec \2) (\1 x y))",
+        smtlib,
+    )
+
+    # replace `(f_evm_bvudiv_N x y)` with `(ite (= y (_ bv0 N)) (_ bv0 N) (bvudiv x y))`
+    # similarly for bvurem, bvsdiv, and bvsrem
+    # NOTE: (bvudiv x (_ bv0 N)) is *defined* to (bvneg (_ bv1 N)); while (div x 0) is undefined
+    smtlib = re.sub(
+        r"\(declare-fun f_evm_(bvudiv|bvurem|bvsdiv|bvsrem)_([0-9]+) \(\(_ BitVec \2\) \(_ BitVec \2\)\) \(_ BitVec \2\)\)",
+        r"(define-fun f_evm_\1_\2 ((x (_ BitVec \2)) (y (_ BitVec \2))) (_ BitVec \2) (ite (= y (_ bv0 \2)) (_ bv0 \2) (\1 x y)))",
         smtlib,
     )
+
     return SMTQuery(smtlib, query.assertions)
 
 
diff --git a/src/halmos/sevm.py b/src/halmos/sevm.py
index a45ed3b1..67a65ea5 100644
--- a/src/halmos/sevm.py
+++ b/src/halmos/sevm.py
@@ -61,19 +61,19 @@
 f_gasprice = Function("f_gasprice", BitVecSort256)
 
 # uninterpreted arithmetic
-f_div = Function("f_evm_bvudiv", BitVecSort256, BitVecSort256, BitVecSort256)
+f_div = Function("f_evm_bvudiv_256", BitVecSort256, BitVecSort256, BitVecSort256)
 f_mod = {
-    256: Function("f_evm_bvurem", BitVecSort256, BitVecSort256, BitVecSort256),
+    256: Function("f_evm_bvurem_256", BitVecSort256, BitVecSort256, BitVecSort256),
     264: Function("f_evm_bvurem_264", BitVecSort264, BitVecSort264, BitVecSort264),
     512: Function("f_evm_bvurem_512", BitVecSort512, BitVecSort512, BitVecSort512),
 }
 f_mul = {
-    256: Function("f_evm_bvmul", BitVecSort256, BitVecSort256, BitVecSort256),
+    256: Function("f_evm_bvmul_256", BitVecSort256, BitVecSort256, BitVecSort256),
     512: Function("f_evm_bvmul_512", BitVecSort512, BitVecSort512, BitVecSort512),
 }
-f_sdiv = Function("f_evm_bvsdiv", BitVecSort256, BitVecSort256, BitVecSort256)
-f_smod = Function("f_evm_bvsrem", BitVecSort256, BitVecSort256, BitVecSort256)
-f_exp = Function("f_evm_exp", BitVecSort256, BitVecSort256, BitVecSort256)
+f_sdiv = Function("f_evm_bvsdiv_256", BitVecSort256, BitVecSort256, BitVecSort256)
+f_smod = Function("f_evm_bvsrem_256", BitVecSort256, BitVecSort256, BitVecSort256)
+f_exp = Function("f_evm_exp_256", BitVecSort256, BitVecSort256, BitVecSort256)
 
 magic_address: int = 0xAAAA0000
 
diff --git a/tests/expected/all.json b/tests/expected/all.json
index 0774ace3..51a8baa5 100644
--- a/tests/expected/all.json
+++ b/tests/expected/all.json
@@ -22,6 +22,24 @@
             }
         ],
         "test/Arith.t.sol:ArithTest": [
+            {
+                "name": "check_Div_fail(uint256,uint256)",
+                "exitcode": 1,
+                "num_models": 1,
+                "models": null,
+                "num_paths": null,
+                "time": null,
+                "num_bounded_loops": null
+            },
+            {
+                "name": "check_Div_pass(uint256,uint256)",
+                "exitcode": 0,
+                "num_models": 0,
+                "models": null,
+                "num_paths": null,
+                "time": null,
+                "num_bounded_loops": null
+            },
             {
                 "name": "check_Exp(uint256)",
                 "exitcode": 0,
diff --git a/tests/regression/test/Arith.t.sol b/tests/regression/test/Arith.t.sol
index eb5b4225..331cf0ec 100644
--- a/tests/regression/test/Arith.t.sol
+++ b/tests/regression/test/Arith.t.sol
@@ -3,6 +3,12 @@ pragma solidity >=0.8.0 <0.9.0;
 
 contract ArithTest {
 
+    function unchecked_div(uint x, uint y) public pure returns (uint ret) {
+        assembly {
+            ret := div(x, y)
+        }
+    }
+
     function unchecked_mod(uint x, uint y) public pure returns (uint ret) {
         assembly {
             ret := mod(x, y)
@@ -34,4 +40,23 @@ contract ArithTest {
         //  assert(x ** 8 == (x ** 4) ** 2);
         }
     }
+
+    function check_Div_fail(uint x, uint y) public pure {
+        require(x > y);
+
+        uint q = unchecked_div(x, y);
+
+        // note: since x > y, q can be zero only when y == 0, due to the division-by-zero semantics in the EVM
+
+        assert(q != 0); // counterexample: y == 0
+    }
+
+    function check_Div_pass(uint x, uint y) public pure {
+        require(x > y);
+        require(y > 0);
+
+        uint q = unchecked_div(x, y);
+
+        assert(q != 0); // pass
+    }
 }