Skip to content

Commit

Permalink
HolSmt: add unit tests meant to test individual functions
Browse files Browse the repository at this point in the history
These are run as part of the self-tests.

Currently only unit tests for `Z3_ProofReplay.remove_definitions`
have been added.

Two of them are meant to test tricky cases which would cause the
previous implemention of `remove_definitions` to fail.
  • Loading branch information
someplaceguy committed Feb 15, 2024
1 parent 67a8163 commit e72e7e5
Show file tree
Hide file tree
Showing 5 changed files with 244 additions and 9 deletions.
3 changes: 2 additions & 1 deletion src/HolSmt/README
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,13 @@ HolSmtLib.{sig,sml} Entry point into HolSmtLib. Provides GENERIC_SMT_TAC
HolSmtScript.sml Various theorems for HolSmtLib.
Library.sml Common auxiliary functions, tracing.
README This file.
selftest.sml Unit tests for HolSmtLib.
selftest.sml HolSmtLib tests.
SmtLib_Logics.sml SMT-LIB 2 logics.
SmtLib_Parser.sml Parsing of SMT-LIB 2 benchmarks.
SmtLib_Theories.sml SMT-LIB 2 theories.
SmtLib.sml Translation of HOL terms into SMT-LIB 2 format.
SolverSpec.sml Definition of SMT solvers.
Unittest.{sig,sml} Unit tests for HolSmtLib.
Yices.sml Functions to invoke the Yices SMT solver.
Z3.sml Functions to invoke the Z3 SMT solver.
Z3_ProformaThms.sml Proforma theorems, used for Z3 proof reconstruction.
Expand Down
9 changes: 9 additions & 0 deletions src/HolSmt/Unittest.sig
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
signature Unittest =
sig

val assert : bool * string -> unit
val die : string -> 'a

val run_unittests : unit -> unit

end
225 changes: 225 additions & 0 deletions src/HolSmt/Unittest.sml
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
(* Unit tests for HolSmtLib *)

structure Unittest :> Unittest =
struct

open HolKernel Parse boolLib bossLib

(*****************************************************************************)
(* helper functions *)
(*****************************************************************************)

fun die s =
if !Globals.interactive then
raise (Fail s)
else (
print ("\n" ^ s ^ "\n");
OS.Process.exit OS.Process.failure
)

fun assert (x, msg) = if x then () else die ("FAIL: " ^ msg)

(* Make sure two theorems are identical *)
fun compare_thms thm_pair =
let
val hyps_pair = Lib.pair_map Thm.hypset thm_pair
val (concl1, concl2) = Lib.pair_map Thm.concl thm_pair
val () = assert (HOLset.equal hyps_pair,
"theorem hypotheses are different")
val () = assert (Term.term_eq concl1 concl2,
"theorem conclusions are different")
in
()
end

(*****************************************************************************)
(* test definitions *)
(*****************************************************************************)

(* Test: `Z3_ProofReplay.remove_definitions` works without any definitions *)
fun remove_defs_no_defs () = []

(* Test: `Z3_ProofReplay.remove_definitions` works with a duplicate definition *)
fun remove_defs_dup () = [
``(z1:num) = x + 1``,
``(z2:num) = z1 + 2``,
``(z2:num) = (x + 1) + 2``
]

(* Test: `Z3_ProofReplay.remove_definitions` works with the following set of
(tricky) definitions, which can cause an exponential term blow-up in a
naive implementation of `remove_definitions`:
a1 = 1
b1 = 1
a2 = a1 + a1
b2 = b1 + b1
...
x = an
x = bn
Credit: Tjark Weber for coming up with this scenario. *)
fun remove_defs_tricky1 () =
let
val asl = [
``(a1:num) = 1``,
``(b1:num) = 1``,
``(x:num) = a128``,
``(x:num) = b128``
]
(* `gen_def` creates a definition of the form ``si = s(i-1) + s(i-1)`` *)
fun gen_def (i, s) =
let
val si = Term.mk_var (s ^ Int.toString i, ``:num``)
val si_1 = Term.mk_var (s ^ Int.toString (i - 1), ``:num``)
in
boolSyntax.mk_eq (si, numSyntax.mk_plus (si_1, si_1))
end

(* Add ``ai = a(i-1) + a(i-1)`` and the same for ``bi``, for all 1 < i <= n *)
fun add_defs (n, l) =
if n = 1 then
l
else
let
val an_def = gen_def (n, "a")
val bn_def = gen_def (n, "b")
in
add_defs (n - 1, an_def :: bn_def :: l)
end
in
add_defs (128, asl)
end

(* Test: `Z3_ProofReplay.remove_definitions` works with the following set of
(tricky) definitions, which can cause an exponential term blow-up in a
naive implementation of `remove_definitions`:
x = 1000
y = 2000
z1 = x + 1
z2 = y + 1
z3 = z2 + (y + 1) + z1
z3 = (y + 1) + z2 + (x + 1)
z4 = z3 + (z2 + z2 + z1) + z1
z4 = (z2 + z2 + (x + 1)) + z3 + (x + 1)
(...)
z64 = z63 + (z62 + z62 + z1) + z1
z64 = (z62 + z62 + (x + 1)) + z63 + (x + 1)
... except we'll go all the way up to z128 instead of z64 *)
fun remove_defs_tricky2 () =
let
val asl = [
``(x:num) = 1000``,
``(y:num) = 2000``,
``(z1:num) = x + 1``,
``(z2:num) = y + 1``,
``(z3:num) = z2 + (y + 1) + z1``,
``(z3:num) = (y + 1) + z2 + (x + 1)``
]
fun add3 (a, b, c) = numSyntax.mk_plus (numSyntax.mk_plus (a, b), c)

(* `gen_def1` creates a definition of the form:
``zi = z(i-1) + (z(i-2) + z(i-2) + z1) + z1`` *)
fun gen_def1 i =
let
val z1 = Term.mk_var ("z1", ``:num``)
val zi = Term.mk_var ("z" ^ Int.toString i, ``:num``)
val zi_1 = Term.mk_var ("z" ^ Int.toString (i - 1), ``:num``)
val zi_2 = Term.mk_var ("z" ^ Int.toString (i - 2), ``:num``)
val middle_addend = add3 (zi_2, zi_2, z1)
val sum = add3 (zi_1, middle_addend, z1)
in
boolSyntax.mk_eq (zi, sum)
end

(* `gen_def2` creates a definition of the form:
``zi = (z(i-2) + z(i-2) + (x + 1)) + z(i-1) + (x + 1) *)
fun gen_def2 i =
let
val xp1 = ``(x:num) + 1``
val zi = Term.mk_var ("z" ^ Int.toString i, ``:num``)
val zi_1 = Term.mk_var ("z" ^ Int.toString (i - 1), ``:num``)
val zi_2 = Term.mk_var ("z" ^ Int.toString (i - 2), ``:num``)
val first_addend = add3 (zi_2, zi_2, xp1)
val sum = add3 (first_addend, zi_1, xp1)
in
boolSyntax.mk_eq (zi, sum)
end

(* Add the definitions `gen_def1 i` and `gen_def2 i`, for all 3 < i <= n *)
fun add_defs (n, l) =
if n = 3 then
l
else
let
val def1 = gen_def1 n
val def2 = gen_def2 n
in
add_defs (n - 1, def1 :: def2 :: l)
end
in
add_defs (128, asl)
end

(* Wrapper for `Z3_ProofReplay.remove_definitions` unit tests *)
fun remove_defs_test get_definitions_fn =
let
(* Create a simple theorem *)
val thm = Thm.REFL ``i:num``
(* Add a few hypotheses that should not be removed, and which coincidentally
look like definitions *)
val thm = Drule.ADD_ASSUM ``(i:num) = 7`` thm
val thm = Drule.ADD_ASSUM ``(j:num) = i + 3`` thm
(* Add definitions (which should be removed) *)
val asl = get_definitions_fn ()
val defs = List.foldl (Lib.flip HOLset.add) Term.empty_tmset asl
val thm_with_defs = List.foldl (Lib.uncurry Drule.ADD_ASSUM) thm asl
(* Remove definitions *)
val final_thm = Z3_ProofReplay.remove_definitions (defs, thm_with_defs)
in
(* Make sure the resulting theorem is equal to the one before definitions
were added *)
compare_thms (thm, final_thm)
end

(*****************************************************************************)
(* actually perform tests *)
(*****************************************************************************)

fun run_test (name, f) =
let
val () = print ("test " ^ name ^ "...")
val () = Profile.reset_all ()
val () = Profile.profile name f ()
val results = Profile.results ()
val result = Lib.assoc name results
val elapsed = #real result
val elapsed_str = Time.fmt 2 elapsed
val () = print (" OK (elapsed: " ^ elapsed_str ^ "s)\n")
val () = Profile.reset_all ()
in
()
end

fun run_unittests () =
let
val () = print "Running unit tests...\n\n"
val tests = [
("remove_definitions_no_defs", fn () => remove_defs_test remove_defs_no_defs),
("remove_definitions_dup", fn () => remove_defs_test remove_defs_dup),
("remove_definitions_tricky1", fn () => remove_defs_test remove_defs_tricky1),
("remove_definitions_tricky2", fn () => remove_defs_test remove_defs_tricky2)
]
val () = List.app run_test tests
val () = print "\ndone, all unit tests successful.\n"
in
()
end

end (* struct *)
3 changes: 3 additions & 0 deletions src/HolSmt/Z3_ProofReplay.sml
Original file line number Diff line number Diff line change
Expand Up @@ -1313,6 +1313,9 @@ local
end
in

(* For unit tests *)
val remove_definitions = remove_definitions

(* returns a theorem that concludes ``F``, with its hypotheses (a
subset of) those asserted in the proof *)
fun check_proof proof : Thm.thm =
Expand Down
13 changes: 5 additions & 8 deletions src/HolSmt/selftest.sml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
(* Copyright (c) 2009-2012 Tjark Weber. All rights reserved. *)

(* Unit tests for HolSmtLib *)
(* HolSmtLib tests *)
open HolKernel Parse boolLib bossLib;

val _ = print "Testing HolSmtLib\n"
Expand Down Expand Up @@ -40,13 +40,7 @@ val _ = if Z3.is_configured () then () else

local

fun die s =
if !Globals.interactive then
raise (Fail s)
else (
print ("\n" ^ s ^ "\n");
OS.Process.exit OS.Process.failure
)
val die = Unittest.die

fun term_with_types t = Lib.with_flag(show_types, true) Hol_pp.term_to_string t

Expand Down Expand Up @@ -1060,6 +1054,9 @@ end
(* actually perform tests *)
(*****************************************************************************)

val () = Unittest.run_unittests ()

val () = print "Running functional tests...\n"
val _ = map (fn (term, test_funs) =>
map (fn test_fun => test_fun term) test_funs) tests

Expand Down

0 comments on commit e72e7e5

Please sign in to comment.