Slow lia goal #10743

samuelgruetter · 2019-09-10T03:10:50Z

Here's a goal on which lia (and omega as well) is very slow:

Require Import Coq.ZArith.ZArith.
Require Import Coq.micromega.Lia.
Require Import Coq.omega.Omega.
Open Scope Z_scope.

Goal
  forall (r2 : Z) (L : nat),
  r2 = 8 * Z.of_nat L ->
  r2 <> 0 ->
  0 <= Z.of_nat L ->
  forall q r r1 : Z,
  (2 ^ r1 <> 0 -> r2 = 2 ^ r1 * q + r) ->
  (0 < 2 ^ r1 -> 0 <= r < 2 ^ r1) ->
  (2 ^ r1 < 0 -> 2 ^ r1 < r <= 0) ->
  (2 ^ r1 = 0 -> q = 0) ->
  (2 ^ r1 = 0 -> r = 0) ->
  forall q0 r0 M : Z,
  (M <> 0 -> 3 = M * q0 + r0) ->
  (0 < M -> 0 <= r0 < M) ->
  (M < 0 -> M < r0 <= 0) ->
  (M = 0 -> q0 = 0) ->
  (M = 0 -> r0 = 0) ->
  forall q1 : Z,
  (M <> 0 -> 4 = M * q1 + r1) ->
  (0 < M -> 0 <= r1 < M) ->
  (M < 0 -> M < r1 <= 0) ->
  (M = 0 -> q1 = 0) ->
  (M = 0 -> r1 = 0) ->
  forall q2 y1 y2 : Z,
  (M <> 0 -> y2 - y1 = M * q2 + r2) ->
  (0 < M -> 0 <= r2 < M) ->
  (M < 0 -> M < r2 <= 0) ->
  (M = 0 -> q2 = 0) -> (M = 0 -> r2 = 0) ->
  0 <= y2 - (y1 + q * 2 ^ r0 + 8) < M.
Proof.
  intros.
  Time lia.

Note that lia can't solve this, and that's fine, but I would like lia to fail faster, so that I can write proof scripts which try many things.
In Coq 8.9.0, this takes 80s, and in Coq master, this takes 15s using coqc from the command line, and interestingly, it takes 40s-50s in Proof General.

/cc @andres-erbsen @fajb

The text was updated successfully, but these errors were encountered:

ppedrot · 2019-09-10T07:15:00Z

FTR, I don't see anything suspicious in the profiling on 110e87a. It is likely a genuinely hard problem, all the time is spent in CNF normalization of micromega.

amahboubi · 2019-09-10T08:05:00Z

@ppedrot : the goal features subterms like 2 ^ r0 or M * q2 or even 2 ^ r1 * q. If the corresponding hypotheses are really useful in the proof, then this is clearly not a goal lia or omega can possibly deal with. I guess this is what @samuelgruetter means when he wishes the tactics fail earlier.

thery · 2019-09-10T08:48:08Z

I think it has to do with the exponential CNF phase here is a more contrived example

Goal 
  forall x y,
   (x = 0 -> y = 0) ->
   (x = 1 -> y = 0) ->
   (x = 2 -> y = 0) ->
   (x = 3 -> y = 0) ->
   (x = 4 -> y = 0) ->
   (x = 5 -> y = 0) ->
   (x = 6 -> y = 0) ->
   (x = 7 -> y = 0) ->
   (x = 8 -> y = 0) ->
   (x = 9 -> y = 0) ->
   (x = 0 -> y = 0) ->
   (x = 1 -> y = 0) ->
   (x = 2 -> y = 0) ->
   (x = 3 -> y = 0) ->
   (x = 4 -> y = 0) ->
   (x = 5 -> y = 0) ->
   (x = 6 -> y = 0) ->
   (x = 7 -> y = 0) ->
   (x = 8 -> y = 0) ->
   (x = 9 -> y = 0) ->
   x = 1.
Proof.
  intros.
  Time lia.

ppedrot · 2019-09-10T09:06:07Z

@amahboubi indeed, but it's still hard to get a reasonable heuristic that wouldn't endanger the results of the tactics. It might be the case that you use the exponentials in some opaque way.

@thery I concur, but even if the algorithm is exponential there is room for improvement. With two trivial optimizations below (directly in OCaml but that should be backported to the Coq source), @samuelgruetter's example fails three times faster.

diff --git a/plugins/micromega/micromega.ml b/plugins/micromega/micromega.ml
index cd620bd4a9..63d84544e7 100644
--- a/plugins/micromega/micromega.ml
+++ b/plugins/micromega/micromega.ml
@@ -1075,11 +1075,8 @@ let or_clause_cnf unsat deduce t0 f =
     ('a1 -> bool) -> ('a1 -> 'a1 -> 'a1 option) -> ('a1, 'a2) cnf -> ('a1,
     'a2) cnf -> ('a1, 'a2) cnf **)
 
-let rec or_cnf unsat deduce f f' =
-  match f with
-  | [] -> cnf_tt
-  | e::rst ->
-    app (or_cnf unsat deduce rst f') (or_clause_cnf unsat deduce e f')
+let or_cnf unsat deduce f f' =
+  CList.map_append (fun e -> or_clause_cnf unsat deduce e f') f
 
 (** val and_cnf : ('a1, 'a2) cnf -> ('a1, 'a2) cnf -> ('a1, 'a2) cnf **)
 
@@ -1162,7 +1159,7 @@ let ror_clause_cnf unsat deduce t0 f =
     let acc,tg = pat in
     (match ror_clause unsat deduce t0 e with
      | Inl cl -> (cl::acc),tg
-     | Inr l -> acc,(app tg l))) ([],[]) f
+     | Inr l -> acc,(l :: tg))) ([],[]) f
 
 (** val ror_cnf :
     ('a1 -> bool) -> ('a1 -> 'a1 -> 'a1 option) -> ('a1 * 'a2) list list ->
@@ -1174,7 +1171,7 @@ let rec ror_cnf unsat deduce f f' =
   | e::rst ->
     let rst_f',t0 = ror_cnf unsat deduce rst f' in
     let e_f',t' = ror_clause_cnf unsat deduce e f' in
-    (app rst_f' e_f'),(app t0 t')
+    (app rst_f' e_f'),(app t0 (List.concat @@ List.rev t'))
 
 (** val rxcnf :
     ('a2 -> bool) -> ('a2 -> 'a2 -> 'a2 option) -> ('a1 -> 'a3 -> ('a2, 'a3)
diff --git a/plugins/micromega/micromega.mli b/plugins/micromega/micromega.mli
index 6da0c754f4..bdd72ab19f 100644
--- a/plugins/micromega/micromega.mli
+++ b/plugins/micromega/micromega.mli
@@ -317,7 +317,7 @@ val ror_clause :
 
 val ror_clause_cnf :
   ('a1 -> bool) -> ('a1 -> 'a1 -> 'a1 option) -> ('a1 * 'a2) list -> ('a1, 'a2) clause
-  list -> ('a1, 'a2) clause list * 'a2 list
+  list -> ('a1, 'a2) clause list * 'a2 list list
 
 val ror_cnf :
   ('a1 -> bool) -> ('a1 -> 'a1 -> 'a1 option) -> ('a1 * 'a2) list list -> ('a1, 'a2)

The morale is that we shouldn't append lists one by one...

amahboubi · 2019-09-10T10:12:50Z

On the other hand, it is quite easy in this case to see that the formula is false, e.g. with M = 0 = y1 = y2. I suspect being able to test the property in parallel would provide the fastest failure in a number of cases.

fajb · 2019-09-10T11:55:51Z

It is good to know that the CNF can be a bottleneck.
@ppedrot do not hesitate to submit your optimisation.

Eventually, the solution is to not use a CNF...
What we would need is a decent sat solver/ SMT core to deal with propositional logic.
Maybe, intuition lia could be adapted to explore in a depth-first fashion, thus allowing lia to fail faster.

ppedrot · 2019-09-10T12:14:57Z

@fajb I am working on the Coq backport. Is it still the case that we generate micromega.ml out of the Coq file, or does the OCaml file already contain some handwritten tweaks?

fajb · 2019-09-10T12:40:27Z

Yes, micromega.ml is still extracted.
You need to un-comment and run MExtraction.v

ppedrot · 2019-09-10T12:41:29Z

The mli file seems to be handwritten though, as extracting it on master results in a slightly different file. Should I reextract it, or adapt it by hand?

fajb · 2019-09-10T12:54:53Z

I guess this is to pass the test-suite which indeed extracts in a slightly different way -- and complains about the formatting.
Because I do not know how to fix it, I ususally run the test-suite and copy the expected result back in micromega.ml/mli...

thery · 2019-09-12T05:38:37Z

@samuelgruetter your example seems to contain lots of conditionals with the same condition.
Factorizing out these hyps can drastically help omega/lia.

If you define

Ltac cond_hyps_factor :=
    repeat match goal with
           | [ H : ?x -> _, H' : ?x -> _ |- _ ] => 
                pose proof (fun u : x => conj (H u) (H' u)); clear H H' end.

now on your example

intros; cond_hyps_factor; lia

fails almost immediately.

Of course, this may work only if you have many expressions on the same modulo. I would be very interested to know if this little trick makes lia/omega usable again in your applicattion.

fajb · 2019-09-12T08:09:10Z

@thery very nice.
It may also make sense to do the same for any disjunction...

thery · 2019-09-13T12:20:44Z

apparently the benefit is not so big. I've been the victim of some cache effect of lia.
I thought that caching was only trigger for positive result.....

samuelgruetter · 2019-09-13T12:44:12Z

I experienced some inconsistent timings/surprises too while experimenting with lia timings. Is there a way to turn off lia caching?

fajb · 2019-09-13T14:19:50Z

@samuelgruetter , currently there is no way to turn it off.
This the the matter of yet another option, though.
@thery this is odd that the cache takes so long. I'll have a look.

thery · 2019-09-13T17:07:56Z

@samuelgruetter
when benchmarking it always good to remove the cache, (on my machine it is a file .lia.cache).
With an empty cache, my optimization seems to work. Is it the same on your machine

thery · 2019-09-19T07:43:13Z

@samuelgruetter did you try the optimization on other examples?

samuelgruetter · 2019-09-19T17:51:19Z

@thery I tried your optimization on some more examples here: mit-plv/bedrock2#98
It seems that it's an improvement!

thery · 2019-09-19T19:59:39Z

@samuelgruetter very good! And hopefully with the new release of 8.10 you will get another significant speedup!

- Specialised hash function. Avoid collisions in extreme scenarios. - Flags to disable the use of the caches. fixes coq#10772, fixes coq#10743

samuelgruetter · 2020-06-12T17:15:12Z

Here's another example where I think lia is slow because of CNF conversion:

Require Import Coq.Lists.List. Import ListNotations.
Require Import Coq.ZArith.ZArith.
Require Import Coq.micromega.Lia.

Axiom word: Type.
Axiom stmt: Type.
Axiom stackalloc_size: stmt -> Z.
Axiom bytes_per_word: Z.
Axiom list_union: forall {A: Type}, (A -> A -> bool) -> list A -> list A -> list A.
Axiom modVars_as_list: (Z -> Z -> bool) -> stmt -> list Z.

Module word.
  Axiom of_Z: Z -> word.
End word.

Declare Scope word_scope.
Notation "! n" := (word.of_Z n) (at level 0, n at level 0, format "! n") : word_scope.
Notation "# n" := (Z.of_nat n) (at level 0, n at level 0, format "# n") : word_scope.
Delimit Scope word_scope with word.
Open Scope word_scope.

Open Scope Z_scope.

Goal forall (rem_framewords : Z) (program_base : word) (pos : Z) (pos0 : Z) (body: stmt)
            (unused_scratch old_argvals old_retvals : list word) (old_ra : word)
            (old_modvarvals ToSplit old_scratch : list word)
            (argnames retnames: list Z),
  0 <= rem_framewords ->
  rem_framewords +
  (stackalloc_size body / bytes_per_word +
   #(length argnames + length retnames + 1 +
     length (list_union Z.eqb (modVars_as_list Z.eqb body) argnames))) <=
  #(length
      ((((((ToSplit ++ old_scratch) ++ old_modvarvals) ++ [old_ra]) ++ old_retvals) ++ old_argvals) ++
       unused_scratch)) ->
  bytes_per_word = 4 \/ bytes_per_word = 8 ->
  0 <= stackalloc_size body / bytes_per_word ->
  length
    (((((ToSplit ++ old_scratch) ++ old_modvarvals) ++ [old_ra]) ++ old_retvals) ++ old_argvals) =
  (length
     ((((((ToSplit ++ old_scratch) ++ old_modvarvals) ++ [old_ra]) ++ old_retvals) ++ old_argvals) ++
      unused_scratch) - Z.to_nat rem_framewords)%nat ->
  length unused_scratch = Z.to_nat rem_framewords ->
  length ((((ToSplit ++ old_scratch) ++ old_modvarvals) ++ [old_ra]) ++ old_retvals) =
  (length
     (((((ToSplit ++ old_scratch) ++ old_modvarvals) ++ [old_ra]) ++ old_retvals) ++ old_argvals) -
   length argnames)%nat ->
  length old_argvals = length argnames ->
  length (((ToSplit ++ old_scratch) ++ old_modvarvals) ++ [old_ra]) =
  (length ((((ToSplit ++ old_scratch) ++ old_modvarvals) ++ [old_ra]) ++ old_retvals) -
   length retnames)%nat ->
  length old_retvals = length retnames ->
  length ((ToSplit ++ old_scratch) ++ old_modvarvals) =
  (length (((ToSplit ++ old_scratch) ++ old_modvarvals) ++ [old_ra]) - 1)%nat ->
  length [old_ra] = 1%nat ->
  length (ToSplit ++ old_scratch) =
  (length ((ToSplit ++ old_scratch) ++ old_modvarvals) -
   length (list_union Z.eqb (modVars_as_list Z.eqb body) argnames))%nat ->
  length old_modvarvals = length (list_union Z.eqb (modVars_as_list Z.eqb body) argnames) ->
  length ToSplit =
  (length (ToSplit ++ old_scratch) - Z.to_nat (stackalloc_size body / bytes_per_word))%nat ->
  length old_scratch = Z.to_nat (stackalloc_size body / bytes_per_word) ->
  length old_scratch = Z.to_nat (stackalloc_size body / bytes_per_word) /\
  length old_modvarvals = length (list_union Z.eqb (modVars_as_list Z.eqb body) argnames) /\
  length old_retvals = length retnames /\
  length old_argvals = length argnames /\
  length unused_scratch = Z.to_nat rem_framewords.
Proof.
  intros.
  (* zify. (* creates many disjunctions of the form "0 < z /\ z0 = z \/ z <= 0 /\ z0 = 0" *) *)
  (* Time repeat split; assumption. (* 0 secs *) *)
  Time lia. (* 19 secs *)
Qed.

Coq version: 8.11.0

Contrary to the original example in this issue, this goal came up without using Z.div_mod_to_equations (which generates many implications and disjunctions), but this time, I assume it's because of the disjunctions that zify creates for nat subtractions.

I didn't make a new issue for this because it seems to be the same underlying problem, and based on @fajb's comment here, I'm not expecting any improvement anytime soon, just wanted to record this test case here.

On the bright side: I'm adding a feature to a compiler whose correctness proof I wrote in Coq 8.9, and now I'm working in 8.11, and there are several goals where in 8.9, I had to manually clear all hypotheses except a few useful ones to make lia fast enough, and now I can often just keep all hypotheses without worrying about which ones are used, and lia still is fast enough, so even if Coq doesn't provide the kind of arithmetic solver I'd like yet, some noticeable improvement has happened 😃

thery · 2020-06-12T17:23:27Z

even if Coq doesn't provide the kind of arithmetic solver I'd like yet, some noticeable improvement has happened

👍

samuelgruetter · 2021-05-17T04:25:38Z

To add to the collection of test cases, here's a case where lia takes several minutes to fail:

Require Import ZArith Lia. Open Scope Z_scope.

Goal forall i w0 wi bpw : Z,
  0 <= w0 < 2 ^ wi ->
  (w0 + (bpw mod 2 ^ wi * Z.of_nat (Z.to_nat i)) mod 2 ^ wi) mod 2 ^ wi =
  (w0 + (i * bpw) mod 2 ^ wi) mod 2 ^ wi.
Proof.
  intros. Z.div_mod_to_equations.
  Fail lia.

Using @fajb's itauto, itauto lia fails immediately.

Coq version: master (77d56f3)

fajb · 2021-05-17T08:50:51Z

It might make sense to add @thery 's trick to Z.div_mod_to_equations.

ppedrot mentioned this issue Sep 10, 2019

Optimize list constructions in micromega #10745

Closed

pi8027 added the part: micromega The lia, nia, lra, nra and psatz tactics. Also the legacy omega tactic. label Sep 18, 2019

fajb mentioned this issue Sep 19, 2019

Improved handling of micromega caches #10765

Merged

samuelgruetter mentioned this issue Sep 19, 2019

[do not merge] profiling for lia in bsearch.v mit-plv/bedrock2#98

Draft

samuelgruetter mentioned this issue Sep 19, 2019

Option to disable lia caching #10772

Closed

fajb added a commit to fajb/coq that referenced this issue Sep 23, 2019

Improved handling of micromega caches

9973a8f

- Specialised hash function. Avoid collisions in extreme scenarios. - Flags to disable the use of the caches. fixes coq#10772, fixes coq#10743

pi8027 added the kind: performance Improvements to performance and efficiency. label Sep 23, 2019

fajb added a commit to fajb/coq that referenced this issue Sep 24, 2019

Improved handling of micromega caches

1e473d1

- Specialised hash function. Avoid collisions in extreme scenarios. - Flags to disable the use of the caches. fixes coq#10772, fixes coq#10743

samuelgruetter mentioned this issue Dec 5, 2019

lia performance regression #11242

Closed

samuelgruetter mentioned this issue Apr 21, 2020

lia runs out of memory when too many implications #12140

Closed

samuelgruetter mentioned this issue Oct 19, 2020

lia is slow on simple goal without hypotheses containing implications or disjunctions #13227

Closed

ppedrot mentioned this issue Feb 28, 2022

lia takes >1min and >4GB to fail on non-linear goal #15748

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Slow lia goal #10743

Slow lia goal #10743

samuelgruetter commented Sep 10, 2019

ppedrot commented Sep 10, 2019

amahboubi commented Sep 10, 2019 •

edited

Loading

thery commented Sep 10, 2019

ppedrot commented Sep 10, 2019

amahboubi commented Sep 10, 2019

fajb commented Sep 10, 2019

ppedrot commented Sep 10, 2019

fajb commented Sep 10, 2019

ppedrot commented Sep 10, 2019

fajb commented Sep 10, 2019

thery commented Sep 12, 2019 •

edited

Loading

fajb commented Sep 12, 2019

thery commented Sep 13, 2019

samuelgruetter commented Sep 13, 2019

fajb commented Sep 13, 2019

thery commented Sep 13, 2019

thery commented Sep 19, 2019

samuelgruetter commented Sep 19, 2019

thery commented Sep 19, 2019

samuelgruetter commented Jun 12, 2020

thery commented Jun 12, 2020

samuelgruetter commented May 17, 2021

fajb commented May 17, 2021

Slow lia goal #10743

Slow lia goal #10743

Comments

samuelgruetter commented Sep 10, 2019

ppedrot commented Sep 10, 2019

amahboubi commented Sep 10, 2019 • edited Loading

thery commented Sep 10, 2019

ppedrot commented Sep 10, 2019

amahboubi commented Sep 10, 2019

fajb commented Sep 10, 2019

ppedrot commented Sep 10, 2019

fajb commented Sep 10, 2019

ppedrot commented Sep 10, 2019

fajb commented Sep 10, 2019

thery commented Sep 12, 2019 • edited Loading

fajb commented Sep 12, 2019

thery commented Sep 13, 2019

samuelgruetter commented Sep 13, 2019

fajb commented Sep 13, 2019

thery commented Sep 13, 2019

thery commented Sep 19, 2019

samuelgruetter commented Sep 19, 2019

thery commented Sep 19, 2019

samuelgruetter commented Jun 12, 2020

thery commented Jun 12, 2020

samuelgruetter commented May 17, 2021

fajb commented May 17, 2021

amahboubi commented Sep 10, 2019 •

edited

Loading

thery commented Sep 12, 2019 •

edited

Loading