diff --git a/Engine/Meta/MetaAnchored.v b/Engine/Meta/MetaAnchored.v index 65b84d6..561135d 100644 --- a/Engine/Meta/MetaAnchored.v +++ b/Engine/Meta/MetaAnchored.v @@ -106,7 +106,7 @@ Proof. injection READ as <-. eapply res_indep; eauto. (* tree_disj*) - - boolprop; erewrite IHHtree1; eauto. + - boolprop; erewrite IHHtree1; eauto. (* tree_sequence *) - simpl in IHHtree. boolprop; eauto. (* tree_quant_forced *) @@ -114,8 +114,8 @@ Proof. (* tree_quant_free *) - boolprop. destruct greedy; simpl; erewrite IHHtree2; eauto. - + erewrite res_indep; eauto. - + erewrite res_indep; eauto. + + erewrite res_indep; eauto. + + erewrite res_indep; eauto. (* tree_lk *) - boolprop. + destruct lk; try discriminate. diff --git a/Engine/Meta/MetaLiterals.v b/Engine/Meta/MetaLiterals.v index 5a764bd..ddcf7da 100644 --- a/Engine/Meta/MetaLiterals.v +++ b/Engine/Meta/MetaLiterals.v @@ -63,9 +63,7 @@ Definition try_lit_search {strs:StrSearch} (r:regex) (inp:input) : search_result (* if it has groups we must reconstruct them *) (* LATER: do group reconstruction with an anchored engine *) if has_groups r then Unsupported - (* LATER: return exact result *) - (* else Some (Some (advance_input_n inp' (length s) forward, Groups.GroupMap.empty)) *) - else Unsupported + else Ok (Some (advance_input_n inp' (length s) forward, Groups.GroupMap.empty)) | None => Ok None end end. @@ -127,7 +125,11 @@ Proof. - destruct has_asserts eqn:Hasserts; [discriminate|]. destruct input_search eqn:Hsearch. (* we found a match *) - + destruct has_groups eqn:Hgroups; discriminate. + + destruct has_groups eqn:Hgroups; [discriminate|]. + injection Htry as <-. + eapply exact_literal_result_unanchored in Hsearch as [gm' Hleaf]; eauto. + eapply no_groups_empty_gm in Htree; simpl; boolprop; eauto. simpl in Htree. + now subst. (* we did not find a match *) + injection Htry as <-. rewrite input_search_none_str_search in Hsearch. @@ -179,8 +181,7 @@ Proof. unfold first_leaf. subst. simpl. unfold advance_input'. simpl. assert (Hnoskip: tree_res tskip Groups.GroupMap.empty (Input (c :: next) pref) forward = None). { eapply extract_literal_prefix_contra, input_search_no_earlier; eauto. - rewrite input_prefix_strict_suffix in Hprefix, Hlow. - split; destruct Hprefix, Hlow; subst; eauto using ss_next', ss_advance. + split; ss_solve. } rewrite Hnoskip. simpl. inversion Htree'. inversion TREECONT. @@ -201,7 +202,7 @@ Proof. - erewrite un_exec_correct; eauto. assert (input_prefix inp i forward). { apply input_search_strict_suffix in Hsearch. - now rewrite <-input_prefix_strict_suffix in Hsearch. + ss_solve. } eapply un_exec_all_between_str_search_eq; eauto using ip_eq. (* there is no occurrence of the literal *) diff --git a/Engine/Prefix.v b/Engine/Prefix.v index 223fa1c..b4dd6d7 100644 --- a/Engine/Prefix.v +++ b/Engine/Prefix.v @@ -9,7 +9,7 @@ From Stdlib Require Import List Lia RelationClasses FunInd Recdef Arith. Import ListNotations. -From Linden Require Import Regex Chars Semantics Tree FunctionalSemantics. +From Linden Require Import Regex Chars Semantics Tree FunctionalSemantics FunctionalUtils. From Linden Require Import Parameters LWParameters. From Linden Require Import StrictSuffix. From Linden Require Import Tactics. @@ -327,6 +327,46 @@ Proof. now apply Hnf. Qed. +(* given an input search from inp1 to inp3 where they are different *) +(* the search on the input right after inp1 also returns inp3 *) +Lemma input_search_advance {strs: StrSearch}: + forall inp1 inp2 inp3 p, + input_search p inp1 = Some inp3 -> + strict_suffix inp3 inp1 forward -> + advance_input inp1 forward = Some inp2 -> + input_search p inp2 = Some inp3. +Proof. + unfold input_search. + intros inp1 inp2 inp3 p Hsearch Hss Hadv. + destruct (str_search p (next_str inp1)) as [n|] eqn:Hss1; [injection Hsearch as Heq|discriminate]. + destruct n as [|n]; [rewrite advance_input_n_0 in Heq; ss_solve|]. + destruct inp1 as [[|c next1] pref1]; [discriminate Hadv|]. + injection Hadv as <-. + simpl. + rewrite (str_search_succ_next _ _ _ _ Hss1). + f_equal. subst. symmetry. apply advance_input_n_succ_forward. +Qed. + +(* given an input search from inp1 to inp3, all searches on inputs *) +(* between inp1 and inp3 also return inp3 *) +Lemma input_search_between {strs: StrSearch}: + forall inp1 inp2 inp3 p, + input_search p inp1 = Some inp3 -> + inp2 = inp1 \/ strict_suffix inp2 inp1 forward -> + inp3 = inp2 \/ strict_suffix inp3 inp2 forward -> + input_search p inp2 = Some inp3. +Proof. + intros inp1 inp2 inp3 p Hsearch [<- | Hlow] Hhigh; [assumption|]. + remember forward as dir. + induction Hlow; subst. + + eapply input_search_advance; eauto. + destruct Hhigh; ss_solve. + + apply input_search_advance with (inp1 := inp2); eauto. + * apply IHHlow; eauto. + destruct Hhigh; ss_solve. + * destruct Hhigh; ss_solve. +Qed. + (* input_search finds no result iff str_search finds no result *) Lemma input_search_none_str_search {strs: StrSearch}: forall s inp, @@ -410,6 +450,16 @@ Proof. + destruct l1; easy. Qed. +(* if chaining of literals is exact, chaining is the concatenation of exacts *) +Lemma chain_literals_exact: + forall l1 l2 p, + chain_literals l1 l2 = Exact p -> + exists p1 p2, l1 = Exact p1 /\ l2 = Exact p2 /\ p = p1 ++ p2. +Proof. + destruct l1, l2; try discriminate. + intros p [=]; eauto. +Qed. + (* the longest string that is a prefix of both strings *) Fixpoint common_prefix (s1 s2 : string) : string := match s1, s2 with @@ -476,6 +526,21 @@ Proof. - destruct H; eqdec; subst; easy. Qed. +(* if merging is exact, there are three possible cases *) +Lemma merge_literals_exact: + forall l1 l2 p, + merge_literals l1 l2 = Exact p -> + l1 = Exact p /\ l2 = Exact p \/ + l1 = Impossible /\ l2 = Exact p \/ + l1 = Exact p /\ l2 = Impossible. +Proof. + destruct l1, l2; simpl; try discriminate; eauto. + - case_if; eqdec. + + injection Heq as <-. intros p [=<-]; eauto. + + easy. + - now case_if. +Qed. + (* extracting literals from a character description *) Fixpoint extract_literal_char (cd: char_descr) : literal := match cd with @@ -561,6 +626,21 @@ Proof. symmetry. apply EqDec.inversion_true. assumption. Qed. +(* a character is in range of itself *) +Lemma char_match_range_refl: forall c, + char_match rer c (CdRange c c) = true. +Proof. + unfold char_match, char_match'. intros c. + rewrite + Character.numeric_pseudo_bij, + CharSet.exist_canonicalized_equiv, + CharSet.exist_spec. + unfold CharSet.Exists. + exists c. split. + - rewrite CharSet.range_spec. now constructor. + - now eqdec. +Qed. + Lemma extract_actions_literal_regex: forall r, extract_actions_literal [Areg r] = extract_literal r. Proof. @@ -899,6 +979,199 @@ Proof. Qed. +(** * Exact literals matching *) + + +(* whether a regex has assertions that do not contribute to the match range *) +Fixpoint has_asserts (r:regex) : bool := + match r with + | Lookaround _ _ | Anchor _ => true + | Sequence r1 r2 | Disjunction r1 r2 => has_asserts r1 || has_asserts r2 + | Group _ r' | Quantified _ _ _ r' => has_asserts r' + | Regex.Character _ | Epsilon | Backreference _ => false + end. + +(* generalization of checking for assertions in actions *) +Fixpoint has_asserts_actions (acts: list action) : bool := + match acts with + | [] => false + | Areg r :: rest => has_asserts r || has_asserts_actions rest + | Acheck _ :: rest => true + | Aclose _ :: rest => has_asserts_actions rest + end. + +(* if a literal of a character descriptor is exact, it is a singleton *) +Lemma extract_literal_char_exact_single: + forall cd s, + extract_literal_char cd = Exact s -> + exists c, s = [c]. +Proof. + induction cd; simpl; try discriminate; intros s H. + - injection H as <-. eauto. + - case_if; eqdec. + + injection H as <-. eauto. + + discriminate. + - apply merge_literals_exact in H. boolprop; eauto. +Qed. + +(* if a character descriptor is exact, it matches the extracted character *) +Lemma extract_literal_char_exact_char_match: + forall cd c, + extract_literal_char cd = Exact [c] -> + char_match rer c cd = true. +Proof. + unfold char_match. + induction cd; try discriminate; simpl; intros c' H. + - injection H as <-. now eqdec. + - case_if; eqdec. + + injection H as <-. apply char_match_range_refl. + + discriminate. + - apply merge_literals_exact in H. boolprop; eauto. +Qed. + +(* if a list of actions has no assertions, the extracted literal is exact, and the input + starts with that extracted literal, then the result of the tree is that literal *) +Lemma exact_literal_result_general : + forall acts tree inp gm p, + is_tree rer acts inp gm forward tree -> + has_asserts_actions acts = false -> + extract_actions_literal acts = Exact p -> + starts_with p (next_str inp) -> + (exists gm', tree_res tree gm inp forward = Some (advance_input_n inp (length p) forward, gm')). +Proof. + intros acts tree inp gm p Htree. + generalize dependent p. + remember forward as dir. + induction Htree; intros p Hnoassert Hlit Hsw; subst; simpl in *; + (* constructs that do not return an Exact *) + try discriminate; + (* remove case where ignoreCase is true *) + try (destruct RegExpRecord.ignoreCase; [now destruct extract_actions_literal|]); + (* follows from IH *) + try solve[destruct extract_actions_literal; easy || eapply IHHtree; boolprop; eauto]. + (* tree_match *) + - injection Hlit as <-. rewrite advance_input_n_0. eauto. + (* tree_char *) + - unfold read_char in READ; destruct inp, next as [|c' next']; [discriminate|]. + destruct char_match eqn:Hmatch; [|discriminate]. injection READ as <-. subst. + destruct extract_literal_char eqn:Hchar, extract_actions_literal; try easy. + apply extract_literal_char_exact_single in Hchar as [c'' Hchar]. subst. + injection Hlit as <-. + inversion Hsw. subst. + replace (length (c' :: s0)) with (S (length s0)) by easy. + rewrite advance_input_n_succ_forward. + eapply IHHtree; eauto. + (* tree_char_fail *) + - destruct extract_literal_char eqn:Hchar, extract_actions_literal; try easy. + pose proof (extract_literal_char_exact_single _ _ Hchar) as [c ?]. subst. + injection Hlit as <-. + unfold read_char in READ. destruct inp, next as [|c' next']; inversion Hsw. subst. + destruct char_match eqn:Hmatch; [discriminate|]. + now rewrite extract_literal_char_exact_char_match in Hmatch. + (* disjunction *) + - apply chain_literals_exact in Hlit as [p1 [p2 [Hact1 [Hact2 Hchain]]]]. subst. rewrite Hact2 in IHHtree1, IHHtree2. + apply merge_literals_exact in Hact1 as [[Hex1 Hex2] | [[Himp1 Hex2] | [Hex1 Himp2]]]. + + rewrite Hex1 in IHHtree1. + specialize (IHHtree1 ltac:(eauto) (p1 ++ p2)). boolprop. repeat specialize_prove IHHtree1 by eauto. + destruct IHHtree1 as [? IHHtree1]. + erewrite IHHtree1. simpl. eauto. + + erewrite extract_literal_impossible_general; [simpl|eauto|simpl; now rewrite Himp1]. + rewrite Hex2 in IHHtree2. + eapply IHHtree2; boolprop; eauto. + + rewrite Hex1 in IHHtree1. + specialize (IHHtree1 ltac:(eauto) (p1 ++ p2)). boolprop. repeat specialize_prove IHHtree1 by eauto. + destruct IHHtree1 as [? IHHtree1]. + erewrite IHHtree1. simpl. eauto. + (* sequence *) + - rewrite <-chain_literals_assoc in Hlit. + eapply IHHtree; boolprop; eauto. + (* tree_quant_forced *) + - destruct plus as [[|n]|]. + + rewrite <-chain_literals_assoc in Hlit. + eapply IHHtree; boolprop; eauto. + + destruct (extract_literal r1), extract_actions_literal, repeat_literal; try easy. + simpl in Hlit. rewrite <-app_assoc in Hlit. + eapply IHHtree; boolprop; eauto. + + destruct (extract_literal r1), extract_actions_literal, repeat_literal; try easy. + simpl in Hlit. rewrite <-app_assoc in Hlit. + eapply IHHtree; boolprop; eauto. + (* tree_quant_free *) + - destruct plus as [[|n]|]; now destruct extract_actions_literal. +Qed. + +Lemma exact_literal_result : + forall r tree inp gm p, + is_tree rer [Areg r] inp gm forward tree -> + has_asserts r = false -> + extract_literal r = Exact p -> + starts_with p (next_str inp) -> + (exists gm', tree_res tree gm inp forward = Some (advance_input_n inp (length p) forward, gm')). +Proof. + intros. + eapply exact_literal_result_general; eauto; simpl. + - now boolprop. + - destruct extract_literal; simpl; now rewrite ?app_nil_r. +Qed. + +(* exact_literal_result_unanchored with extra premises to guide the direction *) +(* of the input we induct on *) +Lemma exact_literal_result_unanchored' {strs:StrSearch}: + forall r i inp inp' p tree, + input_prefix i inp forward -> + input_prefix inp' i forward -> + is_tree rer [Areg (lazy_prefix r)] i Groups.GroupMap.empty forward tree -> + has_asserts r = false -> + extract_literal r = Exact p -> + input_search p i = Some inp -> + exists gm', first_leaf tree i = Some (advance_input_n inp (length p) forward, gm'). +Proof. + intros r i inp inp' p tree Hhigh. + remember forward as dir. + generalize dependent tree. + induction Hhigh; subst; intros tree Hlow Htree Hnoassert Hlit Hsearch. + - inversion Htree. inversion CONT. subst. + eapply input_search_starts_with in Hsearch. + unfold first_leaf. simpl. + eapply exact_literal_result with (tree:=tskip) in Hsearch as [? ->]; simpl; eauto. + - (* relate inputs *) + assert (Hinp31: strict_suffix inp3 inp1 forward) by ss_solve. + destruct inp1 as [next1 pref1], next1 as [|c1 next1]; [discriminate|injection H as <-]. + inversion Htree. inversion CONT. destruct plus; [discriminate|subst]. + inversion ISTREE1; [inversion READ; subst|discriminate]. + (* r has no results at inp1 *) + eapply input_search_no_earlier with (i:=Input (c::next1) pref1) in Hsearch as Hres; try split; eauto. + replace p with (prefix (extract_literal r)) in Hres by now rewrite Hlit. + eapply extract_literal_prefix_contra in Hres; eauto. + (* the rest of the result comes from IH *) + pose proof (is_tree_productivity rer [Areg (lazy_prefix r)] (Input next1 (c :: pref1)) Groups.GroupMap.empty forward) as [t' Ht']. + assert (Hinp': input_prefix inp' (Input next1 (c :: pref1)) forward) by ss_solve. + assert (Hsearch': input_search p (Input next1 (c::pref1)) = Some inp3). { + eapply input_search_advance with (inp1:=Input (c::next1) pref1); eauto. + } + specialize (IHHhigh ltac:(eauto) _ ltac:(eauto) Ht' ltac:(eauto) ltac:(eauto) ltac:(eauto)) as [gm' Hres']. + unfold first_leaf in *. simpl. unfold advance_input'. + rewrite Hres. simpl. + inversion TREECONT; [|exfalso; eauto using ss_advance]. + inversion Ht'. + eapply is_tree_determ with (t2:=treecont) in CONT0; eauto. + subst. eauto. +Qed. + +(* the result of a match of a the lazy prefix of an exact regex with no assertions *) +(* is the position returned from a substring search *) +Lemma exact_literal_result_unanchored {strs:StrSearch} : + forall r inp p inp' tree, + has_asserts r = false -> + extract_literal r = Exact p -> + is_tree rer [Areg (lazy_prefix r)] inp Groups.GroupMap.empty forward tree -> + input_search p inp = Some inp' -> + exists gm', first_leaf tree inp = Some (advance_input_n inp' (length p) forward, gm'). +Proof. + intros. + eapply input_search_strict_suffix in H2 as Hstr. + eapply exact_literal_result_unanchored'; eauto using ip_eq; ss_solve. +Qed. + (** * Extracted literals size *) Lemma chain_literals_length: diff --git a/Semantics/Chars.v b/Semantics/Chars.v index e10826e..6853310 100644 --- a/Semantics/Chars.v +++ b/Semantics/Chars.v @@ -1,5 +1,7 @@ From Stdlib Require Import List Lia. Import ListNotations. + +From Linden Require Import ListLemmas. From Linden Require Import Utils Parameters LWParameters. Import Utils.List. From Warblre Require Import Base Typeclasses RegExpRecord Semantics Result Errors. @@ -263,6 +265,14 @@ Section Chars. intros i dir nexti H. unfold advance_input'. rewrite H. reflexivity. Qed. + Lemma advance_input_not_self: + forall inp dir, + ~(advance_input inp dir = Some inp). + Proof. + intros [next pref] dir H. + destruct dir; [destruct next|destruct pref]; inversion H; eapply cons_different; eauto. + Qed. + (* Advancing input several times *) Definition advance_input_n (i: input) (n: nat) (dir: Direction): input := match i with diff --git a/Semantics/FunctionalSemantics.v b/Semantics/FunctionalSemantics.v index d2d18c6..2d6516e 100644 --- a/Semantics/FunctionalSemantics.v +++ b/Semantics/FunctionalSemantics.v @@ -319,6 +319,13 @@ Section FunctionalSemantics. intros [next pref] dir. simpl. now destruct dir. Qed. + (* advancing a cons input by n+1 is the same as advancing its tail by n *) + Lemma advance_input_n_succ_forward: + forall c next pref n, advance_input_n (Input (c :: next) pref) (S n) forward = advance_input_n (Input next (c::pref)) n forward. + Proof. + intros. simpl. now rewrite <-app_assoc. + Qed. + (* May be used to simplify the lemma right after this one *) Lemma skipn_cons_length {A}: forall n (l: list A) x q, diff --git a/Semantics/StrictSuffix.v b/Semantics/StrictSuffix.v index 8f69721..66eab2b 100644 --- a/Semantics/StrictSuffix.v +++ b/Semantics/StrictSuffix.v @@ -1,4 +1,5 @@ From Linden Require Import Chars Parameters LWParameters. +From Linden Require Import ListLemmas. From Warblre Require Import Base Parameters. From Stdlib Require Import List Lia. Import ListNotations. @@ -389,6 +390,56 @@ Section StrictSuffix. pose proof strict_suffix_current inp1 inp1 dir Hss. lia. Qed. + Lemma ss_irreflexive : + forall inp dir, + ~strict_suffix inp inp dir. + Proof. + intros inp dir H. + now apply ss_neq in H. + Qed. + + Lemma ss_asymm: + forall inp1 inp2 dir, + strict_suffix inp1 inp2 dir -> + ~strict_suffix inp2 inp1 dir. + Proof. + intros inp1 inp2 dir H1 H2. + apply (ss_irreflexive inp1 dir). + eapply strict_suffix_trans; eauto. + Qed. + + + Lemma ss_forward_backward : + forall inp1 inp2, + strict_suffix inp1 inp2 forward -> strict_suffix inp2 inp1 backward. + Proof. + remember forward as dir. + induction 1; subst. + - destruct inp as [next ?], next; [discriminate|injection H as <-]. + eauto using ss_advance. + - destruct inp2 as [next ?], next; [discriminate|injection H as <-]. + eauto using ss_next'. + Qed. + + Lemma ss_backward_forward : + forall inp1 inp2, + strict_suffix inp1 inp2 backward -> strict_suffix inp2 inp1 forward. + Proof. + remember backward as dir. + induction 1; subst. + - destruct inp as [? pref], pref; [discriminate|injection H as <-]. + eauto using ss_advance. + - destruct inp2 as [? pref], pref; [discriminate|injection H as <-]. + eauto using ss_next'. + Qed. + + Lemma ss_backward_forward_iff : + forall inp1 inp2, + strict_suffix inp1 inp2 backward <-> strict_suffix inp2 inp1 forward. + Proof. + intros. split; eauto using ss_forward_backward, ss_backward_forward. + Qed. + (** * Prefixes *) (* In general you should use strict_suffix in definitions. *) @@ -426,3 +477,59 @@ Section StrictSuffix. Qed. End StrictSuffix. + +(** Solver for strict_suffix *) + +(* canonicalize into strict suffixes *) +Ltac ss_canon := + match goal with + (* generate strict_suffix *) + | |- input_prefix _ _ _ => + rewrite input_prefix_strict_suffix + | [H: input_prefix _ _ _ |- _] => + rewrite input_prefix_strict_suffix in H; + destruct H; subst + (* simplify advance_input *) + | [H: advance_input ?inp1 forward = Some _ |- _] => + let next := fresh "next" in + destruct inp1 as [next ?pref], next as [|?c ?next]; [discriminate H|]; + inversion H; clear H; subst + | [H: advance_input ?inp1 backward = Some _ |- _] => + let pref := fresh "pref" in + destruct inp1 as [?next pref], pref as [|?c ?pref]; [discriminate H|]; + inversion H; clear H; subst + (* flip backward to forward *) + | [H: strict_suffix _ _ backward |- _] => + rewrite ss_backward_forward_iff in H + | |- strict_suffix _ _ backward => + rewrite ss_backward_forward_iff + | [H: strict_suffix _ _ forward |- _] => idtac + | [H: strict_suffix _ _ ?dir |- _] => destruct dir + end. + +(* solves a strict_suffix goal by finding a proof *) +(* or by finding a contradiction *) +(* assumes statements are canonicalized with ss_canon *) +Ltac ss_solve' := + match goal with + (* solve directly *) + | [H: ?h |- ?h] => eassumption + | [H: advance_input ?inp forward = Some ?inp |- _] => exfalso; eapply advance_input_not_self; eauto + | [H: strict_suffix ?inp ?inp _ |- _] => exfalso; eapply ss_irreflexive; eauto + | [H: strict_suffix ?inp1 ?inp2 forward |- _] => + let H' := fresh "H" in + assert (H': strict_suffix inp2 inp1 forward) by (clear H; ss_solve'); + exfalso; apply (ss_asymm inp1 inp2 forward H H') + | |- strict_suffix (Input ?next (?c::?pref)) (Input (?c::?next) ?pref) forward => + eapply ss_advance; eauto + | [H: _ :: ?t = ?t |- _] => + exfalso; eapply cons_different; eauto + (* find goal transitively *) + | |- _ \/ _ => (left; ss_solve') || (right; ss_solve') + | [H: strict_suffix _ ?inp3 ?dir |- strict_suffix ?inp1 ?inp3 ?dir] => + eapply strict_suffix_trans with (2:=H); + ss_solve' + | _ => solve[eauto using ss_next', ss_advance] + end. + +Ltac ss_solve := solve[repeat ss_canon; ss_solve']. diff --git a/WarblreEquiv/ListLemmas.v b/Utils/ListLemmas.v similarity index 93% rename from WarblreEquiv/ListLemmas.v rename to Utils/ListLemmas.v index 5c90b1c..b70d017 100644 --- a/WarblreEquiv/ListLemmas.v +++ b/Utils/ListLemmas.v @@ -53,3 +53,10 @@ Proof. rewrite skipn_all in Hconcat. now replace (Z.to_nat endInd1) with (length pref) by lia. Qed. + +Lemma cons_different {A}: forall (x: A) (l: list A), l <> x::l. +Proof. + induction l. + - discriminate. + - now inversion 1. +Qed.