From 10bf8f97dd3722837967b0f1108144f15e9fe3f9 Mon Sep 17 00:00:00 2001
From: Marcus Rossel <marcusrossel@icloud.com>
Date: Sat, 3 May 2025 16:26:06 +0200
Subject: [PATCH 01/12] Add (partial )solution to HumanEval160

---
 HumanEvalLean/HumanEval160.lean | 133 +++++++++++++++++++++++++++++---
 1 file changed, 123 insertions(+), 10 deletions(-)

diff --git a/HumanEvalLean/HumanEval160.lean b/HumanEvalLean/HumanEval160.lean
index fae4324..e983d40 100644
--- a/HumanEvalLean/HumanEval160.lean
+++ b/HumanEvalLean/HumanEval160.lean
@@ -1,5 +1,118 @@
-def do_algebra : Unit :=
-  ()
+inductive Op where
+  | add
+  | sub
+  | mul
+  | div
+  | pow
+
+namespace Op
+
+instance : ToString Op where
+  toString
+    | add => "+"
+    | sub => "-"
+    | mul => "*"
+    | div => "/"
+    | pow => "^"
+
+def prio : Op → Nat
+  | add | sub => 1
+  | mul | div => 2
+  | pow       => 3
+
+def leftAssoc : Op → Bool
+  | add | sub => true
+  | mul | div => true
+  | pow       => false
+
+def interpret : Op → (Nat → Nat → Nat)
+  | add => (· + ·)
+  | sub => (· - ·)
+  | mul => (· * ·)
+  | div => (· / ·)
+  | pow => (· ^ ·)
+
+end Op
+
+inductive Expr where
+  | lit (n : Nat)
+  | app (op : Op) (arg₁ arg₂ : Expr)
+  deriving Inhabited
+
+namespace Expr
+
+instance : ToString Expr where
+  toString :=
+    let rec go
+      | .lit n            => s!"{n}"
+      | .app op arg₁ arg₂ => s!"({go arg₁} {op} {go arg₂})"
+    go
+
+def eval : Expr → Nat
+  | lit n            => n
+  | app op arg₁ arg₂ => op.interpret arg₁.eval arg₂.eval
+
+structure ParseState where
+  ops    : List Op   := []
+  args   : List Nat  := []
+  hold   : List Op   := []
+  output : List Expr := []
+  deriving Inhabited
+
+namespace ParseState
+
+def pushArg (σ : ParseState) (arg : Nat) : ParseState :=
+  { σ with output := (lit arg) :: σ.output }
+
+@[simp]
+theorem pushArg_ops (σ : ParseState) : (σ.pushArg arg).ops = σ.ops := rfl
+
+def pushOp (σ : ParseState) (op : Op) : ParseState :=
+  match σ.hold with
+  | [] => { σ with hold := [op] }
+  | top :: hold =>
+    match compare top.prio op.prio, top.leftAssoc with
+    | .lt, _ | .eq, false => { σ with hold := op :: top :: hold }
+    | .gt, _ | .eq, true =>
+      match σ.output with
+      | arg₂ :: arg₁ :: out => { σ with hold := op :: hold, output := .app top arg₁ arg₂ :: out }
+      | _                   => σ -- Trash value chosen to easily satisfy `pushOp_ops`.
+
+@[simp]
+theorem pushOp_ops (σ : ParseState) : (σ.pushOp op).ops = σ.ops := by
+  rw [pushOp]
+  repeat (split <;> try rfl)
+
+def finalize (σ : ParseState) : ParseState :=
+  match _ : σ.hold, σ.output with
+  | op :: hold, arg₂ :: arg₁ :: out => finalize { σ with hold, output := .app op arg₁ arg₂ :: out }
+  | _, _                            => σ
+  termination_by σ.hold
+  decreasing_by simp_all +arith
+
+end ParseState
+
+-- Parses an expression based on the "shunting yard algorithm".
+def parse! (ops : List Op) (args : List Nat) : Expr :=
+  go { ops, args } |>.output[0]!
+where
+  go (σ : ParseState) : ParseState :=
+    match _ : σ.ops, σ.args with
+    | op :: ops, arg :: args => { σ with ops, args }  |>.pushArg arg |>.pushOp op |> go
+    | [],        [arg]       => { σ with args := [] } |>.pushArg arg |>.finalize
+    | _,         _           => panic! "Invalid parse state"
+  termination_by σ.ops
+  decreasing_by simp_all +arith
+
+end Expr
+
+def doAlgebra (ops : List Op) (args : List Nat) : Nat :=
+  Expr.parse! ops args |>.eval
+
+example : doAlgebra [.add, .mul, .sub] [2, 3, 4, 5] = 9  := by native_decide
+example : doAlgebra [.pow, .mul, .add] [2, 3, 4, 5] = 37 := by native_decide
+example : doAlgebra [.add, .mul, .sub] [2, 3, 4, 5] = 9  := by native_decide
+example : doAlgebra [.div, .mul]       [7, 3, 4]    = 8  := by native_decide
 
 /-!
 ## Prompt
@@ -8,16 +121,16 @@ def do_algebra : Unit :=
 
 def do_algebra(operator, operand):
     """
-    Given two lists operator, and operand. The first list has basic algebra operations, and 
-    the second list is a list of integers. Use the two given lists to build the algebric 
+    Given two lists operator, and operand. The first list has basic algebra operations, and
+    the second list is a list of integers. Use the two given lists to build the algebric
     expression and return the evaluation of this expression.
 
     The basic algebra operations:
-    Addition ( + ) 
-    Subtraction ( - ) 
-    Multiplication ( * ) 
-    Floor division ( // ) 
-    Exponentiation ( ** ) 
+    Addition ( + )
+    Subtraction ( - )
+    Multiplication ( * )
+    Floor division ( // )
+    Exponentiation ( ** )
 
     Example:
     operator['+', '*', '-']
@@ -56,4 +169,4 @@ def check(candidate):
     assert True, "This prints if this assert fails 2 (also good for debugging!)"
 
 ```
--/
\ No newline at end of file
+-/

From 08d443fb336c573accd0aa11ac9d48fffc05d8e3 Mon Sep 17 00:00:00 2001
From: Marcus Rossel <marcusrossel@icloud.com>
Date: Sat, 10 May 2025 10:17:55 +0200
Subject: [PATCH 02/12] Redefine parsing to use optionals for failure

---
 HumanEvalLean/HumanEval160.lean | 66 ++++++++++++++++++++++-----------
 1 file changed, 45 insertions(+), 21 deletions(-)

diff --git a/HumanEvalLean/HumanEval160.lean b/HumanEvalLean/HumanEval160.lean
index e983d40..cefbca2 100644
--- a/HumanEvalLean/HumanEval160.lean
+++ b/HumanEvalLean/HumanEval160.lean
@@ -52,6 +52,14 @@ def eval : Expr → Nat
   | lit n            => n
   | app op arg₁ arg₂ => op.interpret arg₁.eval arg₂.eval
 
+def lits : Expr → List Nat
+  | lit n           => [n]
+  | app _ arg₁ arg₂ => arg₁.lits ++ arg₂.lits
+
+def apps : Expr → List Op
+  | lit _            => []
+  | app op arg₁ arg₂ => arg₁.apps ++ op :: arg₂.apps
+
 structure ParseState where
   ops    : List Op   := []
   args   : List Nat  := []
@@ -67,21 +75,25 @@ def pushArg (σ : ParseState) (arg : Nat) : ParseState :=
 @[simp]
 theorem pushArg_ops (σ : ParseState) : (σ.pushArg arg).ops = σ.ops := rfl
 
-def pushOp (σ : ParseState) (op : Op) : ParseState :=
+def pushOp? (σ : ParseState) (op : Op) : Option ParseState :=
   match σ.hold with
-  | [] => { σ with hold := [op] }
+  | [] => some { σ with hold := [op] }
   | top :: hold =>
     match compare top.prio op.prio, top.leftAssoc with
-    | .lt, _ | .eq, false => { σ with hold := op :: top :: hold }
+    | .lt, _ | .eq, false => some { σ with hold := op :: top :: hold }
     | .gt, _ | .eq, true =>
       match σ.output with
-      | arg₂ :: arg₁ :: out => { σ with hold := op :: hold, output := .app top arg₁ arg₂ :: out }
-      | _                   => σ -- Trash value chosen to easily satisfy `pushOp_ops`.
+      | arg₂ :: arg₁ :: out => some { σ with hold := op :: hold, output := .app top arg₁ arg₂ :: out }
+      | _                   => none
 
 @[simp]
-theorem pushOp_ops (σ : ParseState) : (σ.pushOp op).ops = σ.ops := by
-  rw [pushOp]
-  repeat (split <;> try rfl)
+theorem pushOp?_ops {σ : ParseState} (h : σ.pushOp? op = some σ') : σ'.ops = σ.ops := by
+  rw [pushOp?] at h
+  repeat' split at h
+  all_goals
+    first
+    | injection h with h; rw [←h]
+    | contradiction
 
 def finalize (σ : ParseState) : ParseState :=
   match _ : σ.hold, σ.output with
@@ -93,26 +105,38 @@ def finalize (σ : ParseState) : ParseState :=
 end ParseState
 
 -- Parses an expression based on the "shunting yard algorithm".
-def parse! (ops : List Op) (args : List Nat) : Expr :=
-  go { ops, args } |>.output[0]!
+def parse? (ops : List Op) (args : List Nat) : Option Expr :=
+  go { ops, args } >>= (·.output[0]?)
 where
-  go (σ : ParseState) : ParseState :=
+  go (σ : ParseState) : Option ParseState :=
     match _ : σ.ops, σ.args with
-    | op :: ops, arg :: args => { σ with ops, args }  |>.pushArg arg |>.pushOp op |> go
-    | [],        [arg]       => { σ with args := [] } |>.pushArg arg |>.finalize
-    | _,         _           => panic! "Invalid parse state"
+    | op :: ops, arg :: args =>
+      match _ : { σ with ops, args } |>.pushArg arg |>.pushOp? op with
+      | none   => none
+      | some σ => go σ
+    | [], [arg] => { σ with args := [] } |>.pushArg arg |>.finalize
+    | _, _  => none
   termination_by σ.ops
-  decreasing_by simp_all +arith
+  decreasing_by simp_all +arith [ParseState.pushOp?_ops ‹_›]
+
+theorem parse?.some_iff : (parse? ops args = some e) ↔ (args.length = ops.length + 1) := by
+  sorry
+
+theorem parse?.lits_eq_args (h : parse? ops args = some e) : e.lits = args := by
+  sorry
+
+theorem parse?.apps_eq_ops (h : parse? ops args = some e) : e.apps = ops := by
+  sorry
 
 end Expr
 
-def doAlgebra (ops : List Op) (args : List Nat) : Nat :=
-  Expr.parse! ops args |>.eval
+def doAlgebra (ops : List Op) (args : List Nat) : Option Nat :=
+  Expr.eval <$> Expr.parse? ops args
 
-example : doAlgebra [.add, .mul, .sub] [2, 3, 4, 5] = 9  := by native_decide
-example : doAlgebra [.pow, .mul, .add] [2, 3, 4, 5] = 37 := by native_decide
-example : doAlgebra [.add, .mul, .sub] [2, 3, 4, 5] = 9  := by native_decide
-example : doAlgebra [.div, .mul]       [7, 3, 4]    = 8  := by native_decide
+example : doAlgebra [.add, .mul, .sub] [2, 3, 4, 5] = some 9  := by native_decide
+example : doAlgebra [.pow, .mul, .add] [2, 3, 4, 5] = some 37 := by native_decide
+example : doAlgebra [.add, .mul, .sub] [2, 3, 4, 5] = some 9  := by native_decide
+example : doAlgebra [.div, .mul]       [7, 3, 4]    = some 8  := by native_decide
 
 /-!
 ## Prompt

From a2033fa82587d9f855ff86025eb0079a4d6588f0 Mon Sep 17 00:00:00 2001
From: Marcus Rossel <marcusrossel@icloud.com>
Date: Wed, 14 May 2025 20:50:02 +0200
Subject: [PATCH 03/12] Prove that successful parsing produces exactly one
 output element

---
 HumanEvalLean/HumanEval160.lean | 88 +++++++++++++++++++++++++--------
 1 file changed, 67 insertions(+), 21 deletions(-)

diff --git a/HumanEvalLean/HumanEval160.lean b/HumanEvalLean/HumanEval160.lean
index cefbca2..13a3805 100644
--- a/HumanEvalLean/HumanEval160.lean
+++ b/HumanEvalLean/HumanEval160.lean
@@ -69,12 +69,10 @@ structure ParseState where
 
 namespace ParseState
 
+@[simp]
 def pushArg (σ : ParseState) (arg : Nat) : ParseState :=
   { σ with output := (lit arg) :: σ.output }
 
-@[simp]
-theorem pushArg_ops (σ : ParseState) : (σ.pushArg arg).ops = σ.ops := rfl
-
 def pushOp? (σ : ParseState) (op : Op) : Option ParseState :=
   match σ.hold with
   | [] => some { σ with hold := [op] }
@@ -95,37 +93,85 @@ theorem pushOp?_ops {σ : ParseState} (h : σ.pushOp? op = some σ') : σ'.ops =
     | injection h with h; rw [←h]
     | contradiction
 
+theorem pushOp?_output_hold_length
+    {σ₁ : ParseState} (hp : σ₁.pushOp? op = some σ₂) (hl : σ₁.hold.length < σ₁.output.length) :
+    σ₂.output.length - σ₂.hold.length = σ₁.output.length - σ₁.hold.length - 1 := by
+  rw [pushOp?] at hp
+  (repeat' split at hp) <;> injection hp
+  all_goals next hp => simp_all only [List.length_cons, ←hp]; omega
+
+theorem pushOp?_hold_length_le_output_length
+    {σ₁ : ParseState} (hp : σ₁.pushOp? op = some σ₂) (hl : σ₁.hold.length < σ₁.output.length) :
+    σ₂.hold.length ≤ σ₂.output.length := by
+  rw [pushOp?] at hp
+  (repeat' split at hp) <;> injection hp
+  all_goals next hp => simp_all +arith [←hp]
+
 def finalize (σ : ParseState) : ParseState :=
   match _ : σ.hold, σ.output with
-  | op :: hold, arg₂ :: arg₁ :: out => finalize { σ with hold, output := .app op arg₁ arg₂ :: out }
+  | op :: hold, arg₂ :: arg₁ :: out => finalize { σ with hold, output := app op arg₁ arg₂ :: out }
   | _, _                            => σ
   termination_by σ.hold
   decreasing_by simp_all +arith
 
+theorem finalize_output_length {σ : ParseState} (h : σ.hold.length < σ.output.length) :
+    σ.finalize.output.length = σ.output.length - σ.hold.length := by
+  replace ⟨ops, args, hold, output⟩ := σ
+  induction hold generalizing output <;> cases output <;> rw [finalize]
+  case cons.cons out => cases out <;> simp_all
+  all_goals simp_all
+
+def run (σ : ParseState) : Option ParseState :=
+  match _ : σ.ops, σ.args with
+  | op :: ops, arg :: args =>
+    match _ : { σ with ops, args } |>.pushArg arg |>.pushOp? op with
+    | none   => none
+    | some σ => run σ
+  | [], [arg] => { σ with args := [] } |>.pushArg arg |>.finalize
+  | _, _  => none
+termination_by σ.ops
+decreasing_by simp_all +arith [pushOp?_ops ‹_›]
+
+theorem run_output_length
+    {σ₁ : ParseState} (hr : σ₁.run = some σ₂) (hl : σ₁.hold.length ≤ σ₁.output.length) :
+    σ₂.output.length = σ₁.output.length + 1 - σ₁.hold.length := by
+  rw [run] at hr
+  repeat' split at hr
+  · contradiction
+  next σ' h =>
+    have : σ'.ops.length < σ₁.ops.length := by simp [pushOp?_ops h, *]
+    have hl := pushOp?_hold_length_le_output_length h <| by simp_all +arith
+    rw [run_output_length hr hl]
+    have := pushOp?_output_hold_length h <| by simp only [pushArg, List.length_cons]; omega
+    simp only [pushArg, List.length_cons] at this
+    omega
+  · injection hr with hr
+    rw [←hr, finalize_output_length]
+    repeat simp only [pushArg, List.length_cons]
+    omega
+  · contradiction
+termination_by σ₁.ops.length
+
+-- This implies that `parse?` either fails, or returns precisely the single output element.
+theorem run_output_singleton {ops args} (h : run { ops, args } = some σ) : σ.output.length = 1 :=
+  run_output_length h .refl
+
 end ParseState
 
 -- Parses an expression based on the "shunting yard algorithm".
 def parse? (ops : List Op) (args : List Nat) : Option Expr :=
-  go { ops, args } >>= (·.output[0]?)
-where
-  go (σ : ParseState) : Option ParseState :=
-    match _ : σ.ops, σ.args with
-    | op :: ops, arg :: args =>
-      match _ : { σ with ops, args } |>.pushArg arg |>.pushOp? op with
-      | none   => none
-      | some σ => go σ
-    | [], [arg] => { σ with args := [] } |>.pushArg arg |>.finalize
-    | _, _  => none
-  termination_by σ.ops
-  decreasing_by simp_all +arith [ParseState.pushOp?_ops ‹_›]
-
-theorem parse?.some_iff : (parse? ops args = some e) ↔ (args.length = ops.length + 1) := by
-  sorry
+  match h : { ops, args : ParseState }.run with
+  | none   => none
+  | some σ => σ.output[0]'(by simp [ParseState.run_output_singleton h])
+
+theorem parse?_isSome_iff : (parse? ops args).isSome ↔ (args.length = ops.length + 1) where
+  mp  := sorry
+  mpr := sorry
 
-theorem parse?.lits_eq_args (h : parse? ops args = some e) : e.lits = args := by
+theorem parse?_lits_eq_args (h : parse? ops args = some e) : e.lits = args := by
   sorry
 
-theorem parse?.apps_eq_ops (h : parse? ops args = some e) : e.apps = ops := by
+theorem parse?_apps_eq_ops (h : parse? ops args = some e) : e.apps = ops := by
   sorry
 
 end Expr

From 7ffaab8e4a0715abbec307b949773226ff7bc2dd Mon Sep 17 00:00:00 2001
From: Marcus Rossel <marcusrossel@icloud.com>
Date: Thu, 15 May 2025 20:55:27 +0200
Subject: [PATCH 04/12] Add `push?` for pushing an arg and an op

---
 HumanEvalLean/HumanEval160.lean | 54 +++++++++++++++++++++------------
 1 file changed, 35 insertions(+), 19 deletions(-)

diff --git a/HumanEvalLean/HumanEval160.lean b/HumanEvalLean/HumanEval160.lean
index 13a3805..64ee92e 100644
--- a/HumanEvalLean/HumanEval160.lean
+++ b/HumanEvalLean/HumanEval160.lean
@@ -81,11 +81,11 @@ def pushOp? (σ : ParseState) (op : Op) : Option ParseState :=
     | .lt, _ | .eq, false => some { σ with hold := op :: top :: hold }
     | .gt, _ | .eq, true =>
       match σ.output with
-      | arg₂ :: arg₁ :: out => some { σ with hold := op :: hold, output := .app top arg₁ arg₂ :: out }
+      | arg₂ :: arg₁ :: out => some { σ with hold := op :: hold, output := app top arg₁ arg₂ :: out }
       | _                   => none
 
 @[simp]
-theorem pushOp?_ops {σ : ParseState} (h : σ.pushOp? op = some σ') : σ'.ops = σ.ops := by
+theorem pushOp?_ops {σ₁ : ParseState} (h : σ₁.pushOp? op = some σ₂) : σ₁.ops = σ₂.ops := by
   rw [pushOp?] at h
   repeat' split at h
   all_goals
@@ -107,6 +107,23 @@ theorem pushOp?_hold_length_le_output_length
   (repeat' split at hp) <;> injection hp
   all_goals next hp => simp_all +arith [←hp]
 
+def push? (σ : ParseState) (arg : Nat) (op : Op) : Option ParseState :=
+  σ.pushArg arg |>.pushOp? op
+
+@[simp]
+theorem push?_ops {σ₁ : ParseState} (h : σ₁.push? arg op = some σ₂) : σ₁.ops = σ₂.ops := by
+  rw [push?, pushArg] at h
+  have := pushOp?_ops h
+  repeat simp_all only
+
+theorem push?_output_hold_length
+    {σ₁ : ParseState} (hp : σ₁.push? arg op = some σ₂) (hl : σ₁.hold.length ≤ σ₁.output.length) :
+    σ₁.output.length - σ₁.hold.length = σ₂.output.length - σ₂.hold.length := by
+  rw [push?, pushArg] at hp
+  have hh : σ₁.hold.length < σ₁.output.length + 1 := by omega
+  simp only [pushOp?_output_hold_length hp hh, List.length_cons]
+  omega
+
 def finalize (σ : ParseState) : ParseState :=
   match _ : σ.hold, σ.output with
   | op :: hold, arg₂ :: arg₁ :: out => finalize { σ with hold, output := app op arg₁ arg₂ :: out }
@@ -121,29 +138,28 @@ theorem finalize_output_length {σ : ParseState} (h : σ.hold.length < σ.output
   case cons.cons out => cases out <;> simp_all
   all_goals simp_all
 
-def run (σ : ParseState) : Option ParseState :=
+def run? (σ : ParseState) : Option ParseState :=
   match _ : σ.ops, σ.args with
   | op :: ops, arg :: args =>
-    match _ : { σ with ops, args } |>.pushArg arg |>.pushOp? op with
+    match _ : { σ with ops, args }.push? arg op with
     | none   => none
-    | some σ => run σ
+    | some σ => run? σ
   | [], [arg] => { σ with args := [] } |>.pushArg arg |>.finalize
   | _, _  => none
 termination_by σ.ops
-decreasing_by simp_all +arith [pushOp?_ops ‹_›]
+decreasing_by have := push?_ops ‹_›; simp_all +arith
 
-theorem run_output_length
-    {σ₁ : ParseState} (hr : σ₁.run = some σ₂) (hl : σ₁.hold.length ≤ σ₁.output.length) :
+theorem run?_output_length
+    {σ₁ : ParseState} (hr : σ₁.run? = some σ₂) (hl : σ₁.hold.length ≤ σ₁.output.length) :
     σ₂.output.length = σ₁.output.length + 1 - σ₁.hold.length := by
-  rw [run] at hr
+  rw [run?] at hr
   repeat' split at hr
   · contradiction
   next σ' h =>
-    have : σ'.ops.length < σ₁.ops.length := by simp [pushOp?_ops h, *]
-    have hl := pushOp?_hold_length_le_output_length h <| by simp_all +arith
-    rw [run_output_length hr hl]
-    have := pushOp?_output_hold_length h <| by simp only [pushArg, List.length_cons]; omega
-    simp only [pushArg, List.length_cons] at this
+    have : σ'.ops.length < σ₁.ops.length := by have := push?_ops h; simp_all
+    have hl' := pushOp?_hold_length_le_output_length h <| by simp_all +arith
+    have := push?_output_hold_length h hl
+    simp only [run?_output_length hr hl', pushArg, List.length_cons] at *
     omega
   · injection hr with hr
     rw [←hr, finalize_output_length]
@@ -152,17 +168,17 @@ theorem run_output_length
   · contradiction
 termination_by σ₁.ops.length
 
--- This implies that `parse?` either fails, or returns precisely the single output element.
-theorem run_output_singleton {ops args} (h : run { ops, args } = some σ) : σ.output.length = 1 :=
-  run_output_length h .refl
+-- This implies that `parse?` either fails, or returns precisely a single output element.
+theorem run?_output_singleton {ops args} (h : run? { ops, args } = some σ) : σ.output.length = 1 :=
+  run?_output_length h .refl
 
 end ParseState
 
 -- Parses an expression based on the "shunting yard algorithm".
 def parse? (ops : List Op) (args : List Nat) : Option Expr :=
-  match h : { ops, args : ParseState }.run with
+  match h : { ops, args : ParseState }.run? with
   | none   => none
-  | some σ => σ.output[0]'(by simp [ParseState.run_output_singleton h])
+  | some σ => σ.output[0]'(by simp [ParseState.run?_output_singleton h])
 
 theorem parse?_isSome_iff : (parse? ops args).isSome ↔ (args.length = ops.length + 1) where
   mp  := sorry

From c62f0fab4ab86fd55cc2b974c8c4f3bf67a7c79f Mon Sep 17 00:00:00 2001
From: Marcus Rossel <marcusrossel@icloud.com>
Date: Thu, 15 May 2025 21:16:21 +0200
Subject: [PATCH 05/12] Fix bug in the implementation of `pushOp?`

---
 HumanEvalLean/HumanEval160.lean | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/HumanEvalLean/HumanEval160.lean b/HumanEvalLean/HumanEval160.lean
index 64ee92e..aea3335 100644
--- a/HumanEvalLean/HumanEval160.lean
+++ b/HumanEvalLean/HumanEval160.lean
@@ -74,38 +74,49 @@ def pushArg (σ : ParseState) (arg : Nat) : ParseState :=
   { σ with output := (lit arg) :: σ.output }
 
 def pushOp? (σ : ParseState) (op : Op) : Option ParseState :=
-  match σ.hold with
+  match _ : σ.hold with
   | [] => some { σ with hold := [op] }
   | top :: hold =>
     match compare top.prio op.prio, top.leftAssoc with
     | .lt, _ | .eq, false => some { σ with hold := op :: top :: hold }
     | .gt, _ | .eq, true =>
       match σ.output with
-      | arg₂ :: arg₁ :: out => some { σ with hold := op :: hold, output := app top arg₁ arg₂ :: out }
+      | arg₂ :: arg₁ :: out => pushOp? { σ with hold, output := app top arg₁ arg₂ :: out } op
       | _                   => none
+termination_by σ.hold.length
+decreasing_by all_goals simp +arith [*]
 
 @[simp]
 theorem pushOp?_ops {σ₁ : ParseState} (h : σ₁.pushOp? op = some σ₂) : σ₁.ops = σ₂.ops := by
+  sorry
+  /-
   rw [pushOp?] at h
   repeat' split at h
   all_goals
     first
     | injection h with h; rw [←h]
     | contradiction
+  -/
 
 theorem pushOp?_output_hold_length
     {σ₁ : ParseState} (hp : σ₁.pushOp? op = some σ₂) (hl : σ₁.hold.length < σ₁.output.length) :
     σ₂.output.length - σ₂.hold.length = σ₁.output.length - σ₁.hold.length - 1 := by
+  sorry
+  /-
   rw [pushOp?] at hp
   (repeat' split at hp) <;> injection hp
   all_goals next hp => simp_all only [List.length_cons, ←hp]; omega
+  -/
 
 theorem pushOp?_hold_length_le_output_length
     {σ₁ : ParseState} (hp : σ₁.pushOp? op = some σ₂) (hl : σ₁.hold.length < σ₁.output.length) :
     σ₂.hold.length ≤ σ₂.output.length := by
+  sorry
+  /-
   rw [pushOp?] at hp
   (repeat' split at hp) <;> injection hp
   all_goals next hp => simp_all +arith [←hp]
+  -/
 
 def push? (σ : ParseState) (arg : Nat) (op : Op) : Option ParseState :=
   σ.pushArg arg |>.pushOp? op
@@ -124,6 +135,11 @@ theorem push?_output_hold_length
   simp only [pushOp?_output_hold_length hp hh, List.length_cons]
   omega
 
+theorem push?_output_length_eq_hold_length
+    {σ₁ : ParseState} (hp : σ₁.push? arg op = some σ₂) (he : σ₁.hold.length = σ₁.output.length) :
+    σ₂.output.length = σ₂.hold.length := by
+  sorry -- TODO: Not sure if this is actually provable.
+
 def finalize (σ : ParseState) : ParseState :=
   match _ : σ.hold, σ.output with
   | op :: hold, arg₂ :: arg₁ :: out => finalize { σ with hold, output := app op arg₁ arg₂ :: out }

From 4aeb25c32b70f1ccd123c92a361511f59af3a9f4 Mon Sep 17 00:00:00 2001
From: Marcus Rossel <marcusrossel@icloud.com>
Date: Thu, 15 May 2025 21:34:38 +0200
Subject: [PATCH 06/12] Recover broken proofs

---
 HumanEvalLean/HumanEval160.lean | 33 ++++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/HumanEvalLean/HumanEval160.lean b/HumanEvalLean/HumanEval160.lean
index aea3335..f13c3a5 100644
--- a/HumanEvalLean/HumanEval160.lean
+++ b/HumanEvalLean/HumanEval160.lean
@@ -88,35 +88,42 @@ decreasing_by all_goals simp +arith [*]
 
 @[simp]
 theorem pushOp?_ops {σ₁ : ParseState} (h : σ₁.pushOp? op = some σ₂) : σ₁.ops = σ₂.ops := by
-  sorry
-  /-
   rw [pushOp?] at h
   repeat' split at h
   all_goals
     first
     | injection h with h; rw [←h]
+    | rw [pushOp?_ops h]
     | contradiction
-  -/
+termination_by σ₁.hold
+decreasing_by all_goals simp +arith [*]
 
 theorem pushOp?_output_hold_length
     {σ₁ : ParseState} (hp : σ₁.pushOp? op = some σ₂) (hl : σ₁.hold.length < σ₁.output.length) :
     σ₂.output.length - σ₂.hold.length = σ₁.output.length - σ₁.hold.length - 1 := by
-  sorry
-  /-
   rw [pushOp?] at hp
-  (repeat' split at hp) <;> injection hp
-  all_goals next hp => simp_all only [List.length_cons, ←hp]; omega
-  -/
+  repeat' split at hp <;> try contradiction
+  all_goals
+    first
+      | injection hp with hp
+      | have hp := pushOp?_output_hold_length hp
+    simp_all only [List.length_cons, ←hp]
+    omega
+termination_by σ₁.hold
+decreasing_by all_goals simp +arith [*]
 
 theorem pushOp?_hold_length_le_output_length
     {σ₁ : ParseState} (hp : σ₁.pushOp? op = some σ₂) (hl : σ₁.hold.length < σ₁.output.length) :
     σ₂.hold.length ≤ σ₂.output.length := by
-  sorry
-  /-
   rw [pushOp?] at hp
-  (repeat' split at hp) <;> injection hp
-  all_goals next hp => simp_all +arith [←hp]
-  -/
+  repeat' split at hp
+  all_goals
+    first
+      | contradiction
+      | injection hp with hp; simp_all +arith [←hp]
+      | exact pushOp?_hold_length_le_output_length hp (by simp_all)
+termination_by σ₁.hold
+decreasing_by all_goals simp +arith [*]
 
 def push? (σ : ParseState) (arg : Nat) (op : Op) : Option ParseState :=
   σ.pushArg arg |>.pushOp? op

From a8668df8eb9f66a35957d5deab41cdc35624e9bf Mon Sep 17 00:00:00 2001
From: Marcus Rossel <marcusrossel@icloud.com>
Date: Sun, 18 May 2025 12:48:15 +0200
Subject: [PATCH 07/12] Prove one direction of `parse?_some_iff`

---
 HumanEvalLean/HumanEval160.lean | 129 ++++++++++++++++++++------------
 1 file changed, 81 insertions(+), 48 deletions(-)

diff --git a/HumanEvalLean/HumanEval160.lean b/HumanEvalLean/HumanEval160.lean
index f13c3a5..a776bd3 100644
--- a/HumanEvalLean/HumanEval160.lean
+++ b/HumanEvalLean/HumanEval160.lean
@@ -7,14 +7,6 @@ inductive Op where
 
 namespace Op
 
-instance : ToString Op where
-  toString
-    | add => "+"
-    | sub => "-"
-    | mul => "*"
-    | div => "/"
-    | pow => "^"
-
 def prio : Op → Nat
   | add | sub => 1
   | mul | div => 2
@@ -37,17 +29,9 @@ end Op
 inductive Expr where
   | lit (n : Nat)
   | app (op : Op) (arg₁ arg₂ : Expr)
-  deriving Inhabited
 
 namespace Expr
 
-instance : ToString Expr where
-  toString :=
-    let rec go
-      | .lit n            => s!"{n}"
-      | .app op arg₁ arg₂ => s!"({go arg₁} {op} {go arg₂})"
-    go
-
 def eval : Expr → Nat
   | lit n            => n
   | app op arg₁ arg₂ => op.interpret arg₁.eval arg₂.eval
@@ -65,10 +49,22 @@ structure ParseState where
   args   : List Nat  := []
   hold   : List Op   := []
   output : List Expr := []
-  deriving Inhabited
 
 namespace ParseState
 
+macro_rules
+  | `(tactic| decreasing_tactic) => `(tactic| simp +arith [*])
+
+structure Wellformed (σ : ParseState) where
+  external : σ.args.length = σ.ops.length + 1
+  internal : σ.hold.length = σ.output.length
+
+theorem Wellformed.of_external_cons
+    (wf : Wellformed σ) (ho : σ.ops = op :: os) (ha : σ.args = arg :: as) :
+    Wellformed { σ with ops := os, args := as } where
+  external := have ⟨_, _⟩ := wf; by simp_all
+  internal := wf.internal
+
 @[simp]
 def pushArg (σ : ParseState) (arg : Nat) : ParseState :=
   { σ with output := (lit arg) :: σ.output }
@@ -84,51 +80,60 @@ def pushOp? (σ : ParseState) (op : Op) : Option ParseState :=
       | arg₂ :: arg₁ :: out => pushOp? { σ with hold, output := app top arg₁ arg₂ :: out } op
       | _                   => none
 termination_by σ.hold.length
-decreasing_by all_goals simp +arith [*]
 
-@[simp]
+macro "pushOp?_cases " h:ident : tactic => `(tactic|(
+  rw [pushOp?] at $h:ident
+  repeat' split at $h:ident
+  all_goals try contradiction
+  all_goals try injection $h with $h
+  all_goals try rw [← $h]
+))
+
+theorem pushOp?_args {σ₁ : ParseState} (h : σ₁.pushOp? op = some σ₂) : σ₁.args = σ₂.args := by
+  pushOp?_cases h <;> rw [pushOp?_args h]
+termination_by σ₁.hold
+
 theorem pushOp?_ops {σ₁ : ParseState} (h : σ₁.pushOp? op = some σ₂) : σ₁.ops = σ₂.ops := by
-  rw [pushOp?] at h
-  repeat' split at h
-  all_goals
-    first
-    | injection h with h; rw [←h]
-    | rw [pushOp?_ops h]
-    | contradiction
+  pushOp?_cases h <;> rw [pushOp?_ops h]
 termination_by σ₁.hold
-decreasing_by all_goals simp +arith [*]
 
 theorem pushOp?_output_hold_length
     {σ₁ : ParseState} (hp : σ₁.pushOp? op = some σ₂) (hl : σ₁.hold.length < σ₁.output.length) :
     σ₂.output.length - σ₂.hold.length = σ₁.output.length - σ₁.hold.length - 1 := by
-  rw [pushOp?] at hp
-  repeat' split at hp <;> try contradiction
+  pushOp?_cases hp
   all_goals
-    first
-      | injection hp with hp
-      | have hp := pushOp?_output_hold_length hp
+    try have hp := pushOp?_output_hold_length hp
     simp_all only [List.length_cons, ←hp]
     omega
 termination_by σ₁.hold
-decreasing_by all_goals simp +arith [*]
 
 theorem pushOp?_hold_length_le_output_length
     {σ₁ : ParseState} (hp : σ₁.pushOp? op = some σ₂) (hl : σ₁.hold.length < σ₁.output.length) :
     σ₂.hold.length ≤ σ₂.output.length := by
-  rw [pushOp?] at hp
-  repeat' split at hp
+  pushOp?_cases hp
   all_goals
     first
-      | contradiction
-      | injection hp with hp; simp_all +arith [←hp]
       | exact pushOp?_hold_length_le_output_length hp (by simp_all)
+      | simp_all +arith [←hp]
+termination_by σ₁.hold
+
+theorem pushOp?_some {σ₁ : ParseState} (h : σ₁.hold.length < σ₁.output.length) :
+    ∃ σ₂, σ₁.pushOp? op = some σ₂ := by
+  rw [pushOp?]
+  repeat' split
+  all_goals try apply pushOp?_some
+  all_goals try cases _ : σ₁.output <;> cases ‹List Expr›
+  all_goals simp_all
 termination_by σ₁.hold
-decreasing_by all_goals simp +arith [*]
 
 def push? (σ : ParseState) (arg : Nat) (op : Op) : Option ParseState :=
   σ.pushArg arg |>.pushOp? op
 
-@[simp]
+theorem push?_args {σ₁ : ParseState} (h : σ₁.push? arg op = some σ₂) : σ₁.args = σ₂.args := by
+  rw [push?, pushArg] at h
+  have := pushOp?_args h
+  repeat simp_all only
+
 theorem push?_ops {σ₁ : ParseState} (h : σ₁.push? arg op = some σ₂) : σ₁.ops = σ₂.ops := by
   rw [push?, pushArg] at h
   have := pushOp?_ops h
@@ -144,15 +149,24 @@ theorem push?_output_hold_length
 
 theorem push?_output_length_eq_hold_length
     {σ₁ : ParseState} (hp : σ₁.push? arg op = some σ₂) (he : σ₁.hold.length = σ₁.output.length) :
-    σ₂.output.length = σ₂.hold.length := by
-  sorry -- TODO: Not sure if this is actually provable.
+    σ₂.hold.length = σ₂.output.length := by
+  simp only [push?, pushArg] at hp
+  have := push?_output_hold_length hp (Nat.le_of_eq he)
+  have := pushOp?_hold_length_le_output_length hp (by simp_all)
+  omega
+
+theorem push?_some (wf : Wellformed σ₁) (arg op) : ∃ σ₂, σ₁.push? arg op = some σ₂ :=
+  pushOp?_some <| by simp [wf.internal]
+
+theorem Wellformed.push? (hp : σ₁.push? arg op = some σ₂) (wf : Wellformed σ₁) : Wellformed σ₂ where
+  external := push?_args hp ▸ push?_ops hp ▸ wf.external
+  internal := push?_output_length_eq_hold_length hp wf.internal
 
 def finalize (σ : ParseState) : ParseState :=
   match _ : σ.hold, σ.output with
   | op :: hold, arg₂ :: arg₁ :: out => finalize { σ with hold, output := app op arg₁ arg₂ :: out }
   | _, _                            => σ
-  termination_by σ.hold
-  decreasing_by simp_all +arith
+termination_by σ.hold
 
 theorem finalize_output_length {σ : ParseState} (h : σ.hold.length < σ.output.length) :
     σ.finalize.output.length = σ.output.length - σ.hold.length := by
@@ -178,8 +192,8 @@ theorem run?_output_length
   rw [run?] at hr
   repeat' split at hr
   · contradiction
-  next σ' h =>
-    have : σ'.ops.length < σ₁.ops.length := by have := push?_ops h; simp_all
+  next ops₁ _ _ _ _  σ' h =>
+    have : σ'.ops.length ≤ ops₁.length := by have := push?_ops h; simp_all
     have hl' := pushOp?_hold_length_le_output_length h <| by simp_all +arith
     have := push?_output_hold_length h hl
     simp only [run?_output_length hr hl', pushArg, List.length_cons] at *
@@ -195,6 +209,22 @@ termination_by σ₁.ops.length
 theorem run?_output_singleton {ops args} (h : run? { ops, args } = some σ) : σ.output.length = 1 :=
   run?_output_length h .refl
 
+theorem run?_some (wf : Wellformed σ₁) : ∃ σ₂, σ₁.run? = some σ₂ := by
+  rw [run?]
+  repeat split
+  next op _ arg _ ho ha _ =>
+    have := push?_some (wf.of_external_cons ho ha) arg op
+    simp_all
+  next ops _ _ ho ha σ₂ h =>
+    have : σ₂.ops.length ≤ ops.length := by have := push?_ops h; simp_all
+    exact run?_some <| (wf.of_external_cons ho ha).push? h
+  · simp
+  next h _ =>
+    have ⟨_, _⟩ := wf
+    cases ho : σ₁.ops <;> cases ha : σ₁.args <;> try specialize h _ _ _ _ ho ha
+    all_goals simp_all
+termination_by σ₁.ops.length
+
 end ParseState
 
 -- Parses an expression based on the "shunting yard algorithm".
@@ -203,9 +233,12 @@ def parse? (ops : List Op) (args : List Nat) : Option Expr :=
   | none   => none
   | some σ => σ.output[0]'(by simp [ParseState.run?_output_singleton h])
 
-theorem parse?_isSome_iff : (parse? ops args).isSome ↔ (args.length = ops.length + 1) where
-  mp  := sorry
-  mpr := sorry
+theorem parse?_some_iff : (∃ e, parse? ops args = some e) ↔ (args.length = ops.length + 1) where
+  mp := sorry
+  mpr h := by
+    have := ParseState.run?_some (σ₁ := { ops, args }) ⟨h, rfl⟩
+    rw [parse?]
+    split <;> simp_all
 
 theorem parse?_lits_eq_args (h : parse? ops args = some e) : e.lits = args := by
   sorry

From 1d7392bc05c9f3c7b931ed160015d86b236a56c2 Mon Sep 17 00:00:00 2001
From: Marcus Rossel <marcusrossel@icloud.com>
Date: Sun, 18 May 2025 15:04:53 +0200
Subject: [PATCH 08/12] Complete proof of `parse?_some_iff`

---
 HumanEvalLean/HumanEval160.lean | 31 +++++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/HumanEvalLean/HumanEval160.lean b/HumanEvalLean/HumanEval160.lean
index a776bd3..75095c5 100644
--- a/HumanEvalLean/HumanEval160.lean
+++ b/HumanEvalLean/HumanEval160.lean
@@ -155,7 +155,7 @@ theorem push?_output_length_eq_hold_length
   have := pushOp?_hold_length_le_output_length hp (by simp_all)
   omega
 
-theorem push?_some (wf : Wellformed σ₁) (arg op) : ∃ σ₂, σ₁.push? arg op = some σ₂ :=
+theorem push?_wf (wf : Wellformed σ₁) (arg op) : ∃ σ₂, σ₁.push? arg op = some σ₂ :=
   pushOp?_some <| by simp [wf.internal]
 
 theorem Wellformed.push? (hp : σ₁.push? arg op = some σ₂) (wf : Wellformed σ₁) : Wellformed σ₂ where
@@ -209,15 +209,15 @@ termination_by σ₁.ops.length
 theorem run?_output_singleton {ops args} (h : run? { ops, args } = some σ) : σ.output.length = 1 :=
   run?_output_length h .refl
 
-theorem run?_some (wf : Wellformed σ₁) : ∃ σ₂, σ₁.run? = some σ₂ := by
+theorem run?_wf (wf : Wellformed σ₁) : ∃ σ₂, σ₁.run? = some σ₂ := by
   rw [run?]
   repeat split
   next op _ arg _ ho ha _ =>
-    have := push?_some (wf.of_external_cons ho ha) arg op
+    have := push?_wf (wf.of_external_cons ho ha) arg op
     simp_all
   next ops _ _ ho ha σ₂ h =>
     have : σ₂.ops.length ≤ ops.length := by have := push?_ops h; simp_all
-    exact run?_some <| (wf.of_external_cons ho ha).push? h
+    exact run?_wf <| (wf.of_external_cons ho ha).push? h
   · simp
   next h _ =>
     have ⟨_, _⟩ := wf
@@ -225,6 +225,20 @@ theorem run?_some (wf : Wellformed σ₁) : ∃ σ₂, σ₁.run? = some σ₂ :
     all_goals simp_all
 termination_by σ₁.ops.length
 
+theorem run?_some {σ₁ : ParseState} (h : σ₁.run? = some σ₂) :
+    σ₁.args.length = σ₁.ops.length + 1 := by
+  rw [run?] at h
+  repeat split at h
+  · contradiction
+  next ops _ _ _ _ σ₂ hp =>
+    have : σ₂.ops.length ≤ ops.length := by have := push?_ops hp; simp_all
+    have := run?_some h
+    have := push?_args hp
+    have := push?_ops hp
+    simp_all
+  all_goals simp_all
+termination_by σ₁.ops.length
+
 end ParseState
 
 -- Parses an expression based on the "shunting yard algorithm".
@@ -234,9 +248,14 @@ def parse? (ops : List Op) (args : List Nat) : Option Expr :=
   | some σ => σ.output[0]'(by simp [ParseState.run?_output_singleton h])
 
 theorem parse?_some_iff : (∃ e, parse? ops args = some e) ↔ (args.length = ops.length + 1) where
-  mp := sorry
+  mp h := by
+    replace ⟨_, h⟩ := h
+    rw [parse?] at h
+    split at h
+    · contradiction
+    next hr => simp_all [ParseState.run?_some hr]
   mpr h := by
-    have := ParseState.run?_some (σ₁ := { ops, args }) ⟨h, rfl⟩
+    have := ParseState.run?_wf (σ₁ := { ops, args }) ⟨h, rfl⟩
     rw [parse?]
     split <;> simp_all
 

From 764055925436b5a07c975aad2a3687e767bd4a9d Mon Sep 17 00:00:00 2001
From: Marcus Rossel <marcusrossel@icloud.com>
Date: Sun, 18 May 2025 22:50:33 +0200
Subject: [PATCH 09/12] Progress on `parse?_lits_eq_args`

---
 HumanEvalLean/HumanEval160.lean | 67 ++++++++++++++++++++++++++-------
 1 file changed, 54 insertions(+), 13 deletions(-)

diff --git a/HumanEvalLean/HumanEval160.lean b/HumanEvalLean/HumanEval160.lean
index 75095c5..01a12b9 100644
--- a/HumanEvalLean/HumanEval160.lean
+++ b/HumanEvalLean/HumanEval160.lean
@@ -1,3 +1,7 @@
+theorem List.length_one_eq_getElem_zero {l : List α} (h : l.length = 1) : l = [l[0]] := by
+  cases l <;> try cases ‹List _›
+  all_goals simp at h ⊢
+
 inductive Op where
   | add
   | sub
@@ -52,6 +56,16 @@ structure ParseState where
 
 namespace ParseState
 
+def lits (σ : ParseState) : List Nat :=
+  go σ.output
+where
+  go : List Expr → List Nat
+  | []       => []
+  | e :: out => go out ++ e.lits
+
+theorem output_eq_lits_eq {σ₁ σ₂ : ParseState} (h : σ₁.output = σ₂.output) : σ₁.lits = σ₂.lits := by
+  simp only [lits, h]
+
 macro_rules
   | `(tactic| decreasing_tactic) => `(tactic| simp +arith [*])
 
@@ -97,6 +111,17 @@ theorem pushOp?_ops {σ₁ : ParseState} (h : σ₁.pushOp? op = some σ₂) : 
   pushOp?_cases h <;> rw [pushOp?_ops h]
 termination_by σ₁.hold
 
+theorem pushOp?_lits {σ₁ : ParseState} (h : σ₁.pushOp? op = some σ₂) : σ₁.lits = σ₂.lits := by
+  pushOp?_cases h
+  · have : σ₁.lits = σ₂.lits := output_eq_lits_eq (h ▸ rfl)
+    simp_all only
+  · have : σ₁.lits = σ₂.lits := output_eq_lits_eq (h ▸ rfl)
+    simp_all only
+  · have : σ₁.lits = σ₂.lits := output_eq_lits_eq (h ▸ rfl)
+    simp_all only
+  · sorry -- TODO: removing arg₂ arg₁ from the output and adding (app arg₁ arg₂) preserves the lits
+  · sorry -- TODO: removing arg₂ arg₁ from the output and adding (app arg₁ arg₂) preserves the lits
+
 theorem pushOp?_output_hold_length
     {σ₁ : ParseState} (hp : σ₁.pushOp? op = some σ₂) (hl : σ₁.hold.length < σ₁.output.length) :
     σ₂.output.length - σ₂.hold.length = σ₁.output.length - σ₁.hold.length - 1 := by
@@ -139,6 +164,9 @@ theorem push?_ops {σ₁ : ParseState} (h : σ₁.push? arg op = some σ₂) : 
   have := pushOp?_ops h
   repeat simp_all only
 
+theorem push?_lits {σ₁ : ParseState} (h : σ₁.push? arg op = some σ₂) : σ₂.lits = σ₁.lits ++ [arg] :=
+  pushOp?_lits h |>.symm
+
 theorem push?_output_hold_length
     {σ₁ : ParseState} (hp : σ₁.push? arg op = some σ₂) (hl : σ₁.hold.length ≤ σ₁.output.length) :
     σ₁.output.length - σ₁.hold.length = σ₂.output.length - σ₂.hold.length := by
@@ -186,6 +214,9 @@ def run? (σ : ParseState) : Option ParseState :=
 termination_by σ.ops
 decreasing_by have := push?_ops ‹_›; simp_all +arith
 
+theorem run?_lits {σ₁ : ParseState} (hr : σ₁.run? = some σ₂) : σ₂.lits = σ₁.lits ++ σ₁.args := by
+  sorry
+
 theorem run?_output_length
     {σ₁ : ParseState} (hr : σ₁.run? = some σ₂) (hl : σ₁.hold.length ≤ σ₁.output.length) :
     σ₂.output.length = σ₁.output.length + 1 - σ₁.hold.length := by
@@ -247,33 +278,43 @@ def parse? (ops : List Op) (args : List Nat) : Option Expr :=
   | none   => none
   | some σ => σ.output[0]'(by simp [ParseState.run?_output_singleton h])
 
+theorem parse?_some_to_run?_some (h : parse? ops args = some e) :
+    ∃ σ, { ops, args : ParseState }.run? = some σ := by
+  sorry
+
 theorem parse?_some_iff : (∃ e, parse? ops args = some e) ↔ (args.length = ops.length + 1) where
-  mp h := by
-    replace ⟨_, h⟩ := h
-    rw [parse?] at h
-    split at h
-    · contradiction
-    next hr => simp_all [ParseState.run?_some hr]
+  mp h := parse?_some_to_run?_some h.choose_spec |>.choose_spec |> ParseState.run?_some
   mpr h := by
     have := ParseState.run?_wf (σ₁ := { ops, args }) ⟨h, rfl⟩
     rw [parse?]
     split <;> simp_all
 
 theorem parse?_lits_eq_args (h : parse? ops args = some e) : e.lits = args := by
-  sorry
+  rw [parse?] at h
+  split at h
+  · contradiction
+  next σ hr =>
+    injection h with h
+    have hs := ParseState.run?_output_singleton hr
+    have : σ.output = [e] := h ▸ List.length_one_eq_getElem_zero hs
+    have := ParseState.run?_lits hr
+    simp_all [ParseState.lits, ParseState.lits.go]
 
 theorem parse?_apps_eq_ops (h : parse? ops args = some e) : e.apps = ops := by
   sorry
 
+def parse (ops : List Op) (args : List Nat) (h : args.length = ops.length + 1) : Expr :=
+  (parse? ops args).get <| Option.isSome_iff_exists.mpr <| parse?_some_iff.mpr h
+
 end Expr
 
-def doAlgebra (ops : List Op) (args : List Nat) : Option Nat :=
-  Expr.eval <$> Expr.parse? ops args
+def doAlgebra (ops : List Op) (args : List Nat) (h : args.length = ops.length + 1 := by decide) :=
+  Expr.parse ops args h |>.eval
 
-example : doAlgebra [.add, .mul, .sub] [2, 3, 4, 5] = some 9  := by native_decide
-example : doAlgebra [.pow, .mul, .add] [2, 3, 4, 5] = some 37 := by native_decide
-example : doAlgebra [.add, .mul, .sub] [2, 3, 4, 5] = some 9  := by native_decide
-example : doAlgebra [.div, .mul]       [7, 3, 4]    = some 8  := by native_decide
+example : doAlgebra [.add, .mul, .sub] [2, 3, 4, 5] = 9  := by native_decide
+example : doAlgebra [.pow, .mul, .add] [2, 3, 4, 5] = 37 := by native_decide
+example : doAlgebra [.add, .mul, .sub] [2, 3, 4, 5] = 9  := by native_decide
+example : doAlgebra [.div, .mul]       [7, 3, 4]    = 8  := by native_decide
 
 /-!
 ## Prompt

From eed839246033d9e655523468859219e6657686c4 Mon Sep 17 00:00:00 2001
From: Marcus Rossel <marcusrossel@icloud.com>
Date: Mon, 19 May 2025 17:42:15 +0200
Subject: [PATCH 10/12] Complete proof of `parse?_lits_eq_args`

---
 HumanEvalLean/HumanEval160.lean | 82 ++++++++++++++++++++-------------
 1 file changed, 51 insertions(+), 31 deletions(-)

diff --git a/HumanEvalLean/HumanEval160.lean b/HumanEvalLean/HumanEval160.lean
index 01a12b9..3cd9034 100644
--- a/HumanEvalLean/HumanEval160.lean
+++ b/HumanEvalLean/HumanEval160.lean
@@ -113,14 +113,11 @@ termination_by σ₁.hold
 
 theorem pushOp?_lits {σ₁ : ParseState} (h : σ₁.pushOp? op = some σ₂) : σ₁.lits = σ₂.lits := by
   pushOp?_cases h
-  · have : σ₁.lits = σ₂.lits := output_eq_lits_eq (h ▸ rfl)
-    simp_all only
-  · have : σ₁.lits = σ₂.lits := output_eq_lits_eq (h ▸ rfl)
-    simp_all only
-  · have : σ₁.lits = σ₂.lits := output_eq_lits_eq (h ▸ rfl)
-    simp_all only
-  · sorry -- TODO: removing arg₂ arg₁ from the output and adding (app arg₁ arg₂) preserves the lits
-  · sorry -- TODO: removing arg₂ arg₁ from the output and adding (app arg₁ arg₂) preserves the lits
+  all_goals
+    first
+    | have : σ₁.lits = σ₂.lits := output_eq_lits_eq (h ▸ rfl); simp_all only
+    | rw [← pushOp?_lits h]; simp_all [Expr.lits, lits, lits.go]
+termination_by σ₁.hold
 
 theorem pushOp?_output_hold_length
     {σ₁ : ParseState} (hp : σ₁.pushOp? op = some σ₂) (hl : σ₁.hold.length < σ₁.output.length) :
@@ -128,7 +125,7 @@ theorem pushOp?_output_hold_length
   pushOp?_cases hp
   all_goals
     try have hp := pushOp?_output_hold_length hp
-    simp_all only [List.length_cons, ←hp]
+    simp_all only [List.length_cons, ← hp]
     omega
 termination_by σ₁.hold
 
@@ -139,7 +136,7 @@ theorem pushOp?_hold_length_le_output_length
   all_goals
     first
       | exact pushOp?_hold_length_le_output_length hp (by simp_all)
-      | simp_all +arith [←hp]
+      | simp_all +arith [← hp]
 termination_by σ₁.hold
 
 theorem pushOp?_some {σ₁ : ParseState} (h : σ₁.hold.length < σ₁.output.length) :
@@ -164,6 +161,9 @@ theorem push?_ops {σ₁ : ParseState} (h : σ₁.push? arg op = some σ₂) : 
   have := pushOp?_ops h
   repeat simp_all only
 
+macro_rules
+  | `(tactic| decreasing_tactic) => `(tactic| have := push?_ops ‹_›; simp_all +arith)
+
 theorem push?_lits {σ₁ : ParseState} (h : σ₁.push? arg op = some σ₂) : σ₂.lits = σ₁.lits ++ [arg] :=
   pushOp?_lits h |>.symm
 
@@ -196,6 +196,15 @@ def finalize (σ : ParseState) : ParseState :=
   | _, _                            => σ
 termination_by σ.hold
 
+@[simp]
+theorem finalize_lits (σ : ParseState) : σ.finalize.lits = σ.lits := by
+  rw [finalize]
+  split
+  · rw [finalize_lits]
+    simp_all [Expr.lits, lits, lits.go]
+  · rfl
+termination_by σ.hold
+
 theorem finalize_output_length {σ : ParseState} (h : σ.hold.length < σ.output.length) :
     σ.finalize.output.length = σ.output.length - σ.hold.length := by
   replace ⟨ops, args, hold, output⟩ := σ
@@ -212,28 +221,38 @@ def run? (σ : ParseState) : Option ParseState :=
   | [], [arg] => { σ with args := [] } |>.pushArg arg |>.finalize
   | _, _  => none
 termination_by σ.ops
-decreasing_by have := push?_ops ‹_›; simp_all +arith
+
+macro "run?_cases " h:ident : tactic => `(tactic|(
+  rw [run?] at $h:ident
+  repeat' split at $h:ident
+  all_goals try contradiction
+))
 
 theorem run?_lits {σ₁ : ParseState} (hr : σ₁.run? = some σ₂) : σ₂.lits = σ₁.lits ++ σ₁.args := by
-  sorry
+  run?_cases hr
+  next h =>
+    rw [run?_lits hr]
+    have := push?_lits h
+    have := push?_args h
+    simp_all [lits]
+  · injection hr with hr
+    rw [← hr, finalize_lits]
+    simp_all [Expr.lits, lits, lits.go]
+termination_by σ₁.ops.length
 
 theorem run?_output_length
     {σ₁ : ParseState} (hr : σ₁.run? = some σ₂) (hl : σ₁.hold.length ≤ σ₁.output.length) :
     σ₂.output.length = σ₁.output.length + 1 - σ₁.hold.length := by
-  rw [run?] at hr
-  repeat' split at hr
-  · contradiction
+  run?_cases hr
   next ops₁ _ _ _ _  σ' h =>
-    have : σ'.ops.length ≤ ops₁.length := by have := push?_ops h; simp_all
     have hl' := pushOp?_hold_length_le_output_length h <| by simp_all +arith
     have := push?_output_hold_length h hl
     simp only [run?_output_length hr hl', pushArg, List.length_cons] at *
     omega
   · injection hr with hr
-    rw [←hr, finalize_output_length]
+    rw [← hr, finalize_output_length]
     repeat simp only [pushArg, List.length_cons]
     omega
-  · contradiction
 termination_by σ₁.ops.length
 
 -- This implies that `parse?` either fails, or returns precisely a single output element.
@@ -246,8 +265,7 @@ theorem run?_wf (wf : Wellformed σ₁) : ∃ σ₂, σ₁.run? = some σ₂ :=
   next op _ arg _ ho ha _ =>
     have := push?_wf (wf.of_external_cons ho ha) arg op
     simp_all
-  next ops _ _ ho ha σ₂ h =>
-    have : σ₂.ops.length ≤ ops.length := by have := push?_ops h; simp_all
+  next ho ha _ h =>
     exact run?_wf <| (wf.of_external_cons ho ha).push? h
   · simp
   next h _ =>
@@ -256,18 +274,15 @@ theorem run?_wf (wf : Wellformed σ₁) : ∃ σ₂, σ₁.run? = some σ₂ :=
     all_goals simp_all
 termination_by σ₁.ops.length
 
-theorem run?_some {σ₁ : ParseState} (h : σ₁.run? = some σ₂) :
+theorem run?_some {σ₁ : ParseState} (hr : σ₁.run? = some σ₂) :
     σ₁.args.length = σ₁.ops.length + 1 := by
-  rw [run?] at h
-  repeat split at h
-  · contradiction
-  next ops _ _ _ _ σ₂ hp =>
-    have : σ₂.ops.length ≤ ops.length := by have := push?_ops hp; simp_all
-    have := run?_some h
-    have := push?_args hp
-    have := push?_ops hp
+  run?_cases hr
+  next ops _ _ _ _ σ₂ h =>
+    have := run?_some hr
+    have := push?_args h
+    have := push?_ops h
     simp_all
-  all_goals simp_all
+  · simp_all
 termination_by σ₁.ops.length
 
 end ParseState
@@ -280,7 +295,8 @@ def parse? (ops : List Op) (args : List Nat) : Option Expr :=
 
 theorem parse?_some_to_run?_some (h : parse? ops args = some e) :
     ∃ σ, { ops, args : ParseState }.run? = some σ := by
-  sorry
+  rw [parse?] at h
+  split at h <;> simp_all
 
 theorem parse?_some_iff : (∃ e, parse? ops args = some e) ↔ (args.length = ops.length + 1) where
   mp h := parse?_some_to_run?_some h.choose_spec |>.choose_spec |> ParseState.run?_some
@@ -306,6 +322,10 @@ theorem parse?_apps_eq_ops (h : parse? ops args = some e) : e.apps = ops := by
 def parse (ops : List Op) (args : List Nat) (h : args.length = ops.length + 1) : Expr :=
   (parse? ops args).get <| Option.isSome_iff_exists.mpr <| parse?_some_iff.mpr h
 
+theorem parse?_eq_parse (h : parse? ops args = some e) :
+    e = parse ops args (parse?_some_iff.mp ⟨e, h⟩) := by
+  simp [parse, h]
+
 end Expr
 
 def doAlgebra (ops : List Op) (args : List Nat) (h : args.length = ops.length + 1 := by decide) :=

From b455a4353190de8b4871fb94f39fe64871fda8c5 Mon Sep 17 00:00:00 2001
From: Marcus Rossel <marcusrossel@icloud.com>
Date: Sun, 17 Aug 2025 16:26:21 +0200
Subject: [PATCH 11/12] Define parser correctness

---
 HumanEvalLean/HumanEval160.lean | 124 +++++++++++++++++++++++---------
 lean-toolchain                  |   2 +-
 2 files changed, 90 insertions(+), 36 deletions(-)

diff --git a/HumanEvalLean/HumanEval160.lean b/HumanEvalLean/HumanEval160.lean
index 3cd9034..d78a245 100644
--- a/HumanEvalLean/HumanEval160.lean
+++ b/HumanEvalLean/HumanEval160.lean
@@ -9,17 +9,42 @@ inductive Op where
   | div
   | pow
 
-namespace Op
+structure Input where
+  ops  : List Op  := []
+  args : List Nat := []
+
+abbrev Priority := Nat
 
-def prio : Op → Nat
-  | add | sub => 1
-  | mul | div => 2
-  | pow       => 3
+namespace Op
 
-def leftAssoc : Op → Bool
-  | add | sub => true
-  | mul | div => true
-  | pow       => false
+def prio : Op → Priority
+  | add | sub => 0
+  | mul | div => 1
+  | pow       => 2
+
+inductive Associativity where
+  | left
+  | right
+
+def assoc : Op → Associativity
+  | add | sub => .left
+  | mul | div => .left
+  | pow       => .right
+
+inductive Side where
+  | left
+  | right
+
+-- Given an `op` and another expression of priority `other` appearing on a given `side` of `op`,
+-- this function indicates whether `op`'s parsing should occur after that of `other`. For example,
+-- if `op` is `+` and `other` appears to the `.right` with a priority of `3`, then `other` should be
+-- parsed before `op`. If however `other` had a priority of `1`, then `op` should be parsed before.
+def after (op : Op) (other : Priority) (side : Side := .right) : Bool :=
+  match op.assoc, side with
+  | .left,  .left  => op.prio ≤ other
+  | .left,  .right => op.prio < other
+  | .right, .left  => op.prio < other
+  | .right, .right => op.prio ≤ other
 
 def interpret : Op → (Nat → Nat → Nat)
   | add => (· + ·)
@@ -48,14 +73,43 @@ def apps : Expr → List Op
   | lit _            => []
   | app op arg₁ arg₂ => arg₁.apps ++ op :: arg₂.apps
 
-structure ParseState where
-  ops    : List Op   := []
-  args   : List Nat  := []
+def prio : Expr → Priority
+  | lit _     => 3
+  | app op .. => op.prio
+
+-- An expression is lawful if its structure adheres to the operator priorities (`Op.prio`) and
+-- associativity rules (`Op.assoc`).
+inductive Lawful : Expr → Prop where
+  | lit (n : Nat) : Lawful (lit n)
+  | app (h₁ : op.after arg₁.prio .left) (h₂ : op.after arg₂.prio .right) :
+      Lawful arg₁ → Lawful arg₂ → Lawful (app op arg₁ arg₂)
+
+-- A given expression is equivalent to a list of operators and arguments if the expression contains
+-- the same symbols when read from left to right while also being well-structured (`lawful`).
+structure Equivalent (e : Expr) (inp : Input) where
+  ops    : e.apps = inp.ops
+  args   : e.lits = inp.args
+  lawful : Lawful e
+
+infix:50 " ≈ " => Equivalent
+
+abbrev Parser := Input → Option Expr
+
+-- This defines what it means for a parser to be correct: It produces a result iff the input is
+-- wellformed (`success`), and when it produces a result it is equivalent to the input (`equiv`).
+structure Parser.Correct (par : Parser) where
+  success : (∃ e, par inp = some e) ↔ (inp.args.length = inp.ops.length + 1)
+  equiv   : (par inp = some e) → e ≈ inp
+
+structure ParseState extends Input where
   hold   : List Op   := []
   output : List Expr := []
 
 namespace ParseState
 
+instance : Coe Input ParseState where
+  coe inp := { inp with }
+
 def lits (σ : ParseState) : List Nat :=
   go σ.output
 where
@@ -87,12 +141,10 @@ def pushOp? (σ : ParseState) (op : Op) : Option ParseState :=
   match _ : σ.hold with
   | [] => some { σ with hold := [op] }
   | top :: hold =>
-    match compare top.prio op.prio, top.leftAssoc with
-    | .lt, _ | .eq, false => some { σ with hold := op :: top :: hold }
-    | .gt, _ | .eq, true =>
-      match σ.output with
-      | arg₂ :: arg₁ :: out => pushOp? { σ with hold, output := app top arg₁ arg₂ :: out } op
-      | _                   => none
+    match top.after op.prio, σ.output with
+    | true,  _                   => some { σ with hold := op :: top :: hold }
+    | false, arg₂ :: arg₁ :: out => pushOp? { σ with hold, output := app top arg₁ arg₂ :: out } op
+    | false, _                   => none
 termination_by σ.hold.length
 
 macro "pushOp?_cases " h:ident : tactic => `(tactic|(
@@ -207,7 +259,7 @@ termination_by σ.hold
 
 theorem finalize_output_length {σ : ParseState} (h : σ.hold.length < σ.output.length) :
     σ.finalize.output.length = σ.output.length - σ.hold.length := by
-  replace ⟨ops, args, hold, output⟩ := σ
+  replace { ops, args, hold, output } := σ
   induction hold generalizing output <;> cases output <;> rw [finalize]
   case cons.cons out => cases out <;> simp_all
   all_goals simp_all
@@ -247,7 +299,7 @@ theorem run?_output_length
   next ops₁ _ _ _ _  σ' h =>
     have hl' := pushOp?_hold_length_le_output_length h <| by simp_all +arith
     have := push?_output_hold_length h hl
-    simp only [run?_output_length hr hl', pushArg, List.length_cons] at *
+    simp only [run?_output_length hr hl'] at *
     omega
   · injection hr with hr
     rw [← hr, finalize_output_length]
@@ -288,24 +340,23 @@ termination_by σ₁.ops.length
 end ParseState
 
 -- Parses an expression based on the "shunting yard algorithm".
-def parse? (ops : List Op) (args : List Nat) : Option Expr :=
-  match h : { ops, args : ParseState }.run? with
+def parse? : Parser := fun inp =>
+  match h : ParseState.run? inp with
   | none   => none
   | some σ => σ.output[0]'(by simp [ParseState.run?_output_singleton h])
 
-theorem parse?_some_to_run?_some (h : parse? ops args = some e) :
-    ∃ σ, { ops, args : ParseState }.run? = some σ := by
+theorem parse?_some_to_run?_some (h : parse? inp = some e) : ∃ σ, ParseState.run? inp = some σ := by
   rw [parse?] at h
   split at h <;> simp_all
 
-theorem parse?_some_iff : (∃ e, parse? ops args = some e) ↔ (args.length = ops.length + 1) where
+theorem parse?_some_iff : (∃ e, parse? inp = some e) ↔ (inp.args.length = inp.ops.length + 1) where
   mp h := parse?_some_to_run?_some h.choose_spec |>.choose_spec |> ParseState.run?_some
   mpr h := by
-    have := ParseState.run?_wf (σ₁ := { ops, args }) ⟨h, rfl⟩
+    have := ParseState.run?_wf (σ₁ := { inp with }) ⟨h, rfl⟩
     rw [parse?]
     split <;> simp_all
 
-theorem parse?_lits_eq_args (h : parse? ops args = some e) : e.lits = args := by
+theorem parse?_lits_eq_args (h : parse? inp = some e) : e.lits = inp.args := by
   rw [parse?] at h
   split at h
   · contradiction
@@ -316,20 +367,23 @@ theorem parse?_lits_eq_args (h : parse? ops args = some e) : e.lits = args := by
     have := ParseState.run?_lits hr
     simp_all [ParseState.lits, ParseState.lits.go]
 
-theorem parse?_apps_eq_ops (h : parse? ops args = some e) : e.apps = ops := by
+theorem parse?_apps_eq_ops (h : parse? inp = some e) : e.apps = inp.ops := by
   sorry
 
-def parse (ops : List Op) (args : List Nat) (h : args.length = ops.length + 1) : Expr :=
-  (parse? ops args).get <| Option.isSome_iff_exists.mpr <| parse?_some_iff.mpr h
+theorem parse?_some_equiv (h : parse? inp = some e) : e ≈ inp where
+  ops    := parse?_apps_eq_ops h
+  args   := parse?_lits_eq_args h
+  lawful := sorry
 
-theorem parse?_eq_parse (h : parse? ops args = some e) :
-    e = parse ops args (parse?_some_iff.mp ⟨e, h⟩) := by
-  simp [parse, h]
+theorem parse?_correct : Parser.Correct parse? where
+  success := Expr.parse?_some_iff
+  equiv   := Expr.parse?_some_equiv
 
 end Expr
 
-def doAlgebra (ops : List Op) (args : List Nat) (h : args.length = ops.length + 1 := by decide) :=
-  Expr.parse ops args h |>.eval
+def doAlgebra (ops : List Op) (args : List Nat) (h : args.length = ops.length + 1 := by decide) : Nat :=
+  have h := Option.isSome_iff_exists.mpr <| Expr.parse?_some_iff.mpr h
+  Expr.parse? { ops, args } |>.get h |>.eval
 
 example : doAlgebra [.add, .mul, .sub] [2, 3, 4, 5] = 9  := by native_decide
 example : doAlgebra [.pow, .mul, .add] [2, 3, 4, 5] = 37 := by native_decide
diff --git a/lean-toolchain b/lean-toolchain
index b2153cd..6ac6d4c 100644
--- a/lean-toolchain
+++ b/lean-toolchain
@@ -1 +1 @@
-leanprover/lean4:v4.18.0
+leanprover/lean4:v4.22.0

From db567d7828fce577a149babfa07b5e47aa2684f1 Mon Sep 17 00:00:00 2001
From: Marcus Rossel <marcusrossel@icloud.com>
Date: Fri, 22 Aug 2025 23:30:48 +0200
Subject: [PATCH 12/12] Progress on proving lawfulness

---
 HumanEvalLean/HumanEval160.lean | 117 ++++++++++++++++++++++++--------
 1 file changed, 88 insertions(+), 29 deletions(-)

diff --git a/HumanEvalLean/HumanEval160.lean b/HumanEvalLean/HumanEval160.lean
index d78a245..1868799 100644
--- a/HumanEvalLean/HumanEval160.lean
+++ b/HumanEvalLean/HumanEval160.lean
@@ -13,6 +13,10 @@ structure Input where
   ops  : List Op  := []
   args : List Nat := []
 
+@[simp]
+abbrev Input.Wellformed (inp : Input) : Prop :=
+  inp.args.length = inp.ops.length + 1
+
 abbrev Priority := Nat
 
 namespace Op
@@ -39,12 +43,10 @@ inductive Side where
 -- this function indicates whether `op`'s parsing should occur after that of `other`. For example,
 -- if `op` is `+` and `other` appears to the `.right` with a priority of `3`, then `other` should be
 -- parsed before `op`. If however `other` had a priority of `1`, then `op` should be parsed before.
-def after (op : Op) (other : Priority) (side : Side := .right) : Bool :=
+def after (op : Op) (other : Priority) (side : Side) : Bool :=
   match op.assoc, side with
-  | .left,  .left  => op.prio ≤ other
-  | .left,  .right => op.prio < other
-  | .right, .left  => op.prio < other
-  | .right, .right => op.prio ≤ other
+  | .left, .left  | .right, .right => op.prio ≤ other
+  | .left, .right | .right, .left  => op.prio < other
 
 def interpret : Op → (Nat → Nat → Nat)
   | add => (· + ·)
@@ -98,7 +100,7 @@ abbrev Parser := Input → Option Expr
 -- This defines what it means for a parser to be correct: It produces a result iff the input is
 -- wellformed (`success`), and when it produces a result it is equivalent to the input (`equiv`).
 structure Parser.Correct (par : Parser) where
-  success : (∃ e, par inp = some e) ↔ (inp.args.length = inp.ops.length + 1)
+  success : (∃ e, par inp = some e) ↔ inp.Wellformed
   equiv   : (par inp = some e) → e ≈ inp
 
 structure ParseState extends Input where
@@ -110,6 +112,9 @@ namespace ParseState
 instance : Coe Input ParseState where
   coe inp := { inp with }
 
+instance : Coe ParseState Input where
+  coe := toInput
+
 def lits (σ : ParseState) : List Nat :=
   go σ.output
 where
@@ -124,7 +129,7 @@ macro_rules
   | `(tactic| decreasing_tactic) => `(tactic| simp +arith [*])
 
 structure Wellformed (σ : ParseState) where
-  external : σ.args.length = σ.ops.length + 1
+  external : Input.Wellformed σ
   internal : σ.hold.length = σ.output.length
 
 theorem Wellformed.of_external_cons
@@ -133,15 +138,27 @@ theorem Wellformed.of_external_cons
   external := have ⟨_, _⟩ := wf; by simp_all
   internal := wf.internal
 
+structure Lawful (σ : ParseState) where
+  output : ∀ e ∈ σ.output, e.Lawful
+  prios  : ∀ {o os e₁ e₂ es}, (σ.hold = o :: os) → (σ.output = e₂ :: e₁ :: es) →
+            (o.after e₁.prio .left) ∧ (o.after e₂.prio .right)
+
+theorem _root_.Input.lawful (inp : Input) : Lawful inp where
+  output := nofun
+  prios  := nofun
+
 @[simp]
 def pushArg (σ : ParseState) (arg : Nat) : ParseState :=
   { σ with output := (lit arg) :: σ.output }
 
+theorem pushArg_lawful (law : Lawful σ) (arg) : Lawful (σ.pushArg arg) := by
+  sorry
+
 def pushOp? (σ : ParseState) (op : Op) : Option ParseState :=
   match _ : σ.hold with
   | [] => some { σ with hold := [op] }
   | top :: hold =>
-    match top.after op.prio, σ.output with
+    match top.after op.prio .right, σ.output with
     | true,  _                   => some { σ with hold := op :: top :: hold }
     | false, arg₂ :: arg₁ :: out => pushOp? { σ with hold, output := app top arg₁ arg₂ :: out } op
     | false, _                   => none
@@ -200,6 +217,28 @@ theorem pushOp?_some {σ₁ : ParseState} (h : σ₁.hold.length < σ₁.output.
   all_goals simp_all
 termination_by σ₁.hold
 
+theorem pushOp?_lawful (law : Lawful σ₁) (h : σ₁.pushOp? op = some σ₂) : Lawful σ₂ := by
+  constructor
+  case output =>
+    intro e he
+    pushOp?_cases h
+    · rw [← h] at he; exact law.output _ he
+    · rw [← h] at he; exact law.output _ he
+    case _ top hold hh _ _ e₂ e₁ out ha ho =>
+      refine pushOp?_lawful ⟨?_, ?_⟩ h |>.output _ he
+      · have := law.prios hh ho
+        grind [Lawful, Expr.Lawful]
+      · simp
+        intro o os e₁ e₂ es ho ha he
+        simp_all
+        sorry
+  case prios =>
+    pushOp?_cases h
+    · simp; sorry
+    · simp; sorry
+    · sorry
+termination_by σ₁.hold
+
 def push? (σ : ParseState) (arg : Nat) (op : Op) : Option ParseState :=
   σ.pushArg arg |>.pushOp? op
 
@@ -235,11 +274,16 @@ theorem push?_output_length_eq_hold_length
   have := pushOp?_hold_length_le_output_length hp (by simp_all)
   omega
 
+theorem push?_lawful (law : Lawful σ₁) (h : σ₁.push? arg op = some σ₂) : Lawful σ₂ :=
+  pushOp?_lawful (pushArg_lawful law arg) h
+
 theorem push?_wf (wf : Wellformed σ₁) (arg op) : ∃ σ₂, σ₁.push? arg op = some σ₂ :=
   pushOp?_some <| by simp [wf.internal]
 
 theorem Wellformed.push? (hp : σ₁.push? arg op = some σ₂) (wf : Wellformed σ₁) : Wellformed σ₂ where
-  external := push?_args hp ▸ push?_ops hp ▸ wf.external
+  external := by
+    have ⟨ex, _⟩ := wf
+    simp_all [push?_args hp, push?_ops hp]
   internal := push?_output_length_eq_hold_length hp wf.internal
 
 def finalize (σ : ParseState) : ParseState :=
@@ -264,6 +308,9 @@ theorem finalize_output_length {σ : ParseState} (h : σ.hold.length < σ.output
   case cons.cons out => cases out <;> simp_all
   all_goals simp_all
 
+theorem finalize_lawful (law : Lawful σ) : Lawful σ.finalize := by
+  sorry
+
 def run? (σ : ParseState) : Option ParseState :=
   match _ : σ.ops, σ.args with
   | op :: ops, arg :: args =>
@@ -311,6 +358,11 @@ termination_by σ₁.ops.length
 theorem run?_output_singleton {ops args} (h : run? { ops, args } = some σ) : σ.output.length = 1 :=
   run?_output_length h .refl
 
+theorem run?_lawful (law : Lawful σ₁) (hr : σ₁.run? = some σ₂) : Lawful σ₂ := by
+  run?_cases hr
+  · sorry
+  · sorry
+
 theorem run?_wf (wf : Wellformed σ₁) : ∃ σ₂, σ₁.run? = some σ₂ := by
   rw [run?]
   repeat split
@@ -326,8 +378,7 @@ theorem run?_wf (wf : Wellformed σ₁) : ∃ σ₂, σ₁.run? = some σ₂ :=
     all_goals simp_all
 termination_by σ₁.ops.length
 
-theorem run?_some {σ₁ : ParseState} (hr : σ₁.run? = some σ₂) :
-    σ₁.args.length = σ₁.ops.length + 1 := by
+theorem run?_some {σ₁ : ParseState} (hr : σ₁.run? = some σ₂) : Input.Wellformed σ₁ := by
   run?_cases hr
   next ops _ _ _ _ σ₂ h =>
     have := run?_some hr
@@ -345,35 +396,43 @@ def parse? : Parser := fun inp =>
   | none   => none
   | some σ => σ.output[0]'(by simp [ParseState.run?_output_singleton h])
 
-theorem parse?_some_to_run?_some (h : parse? inp = some e) : ∃ σ, ParseState.run? inp = some σ := by
+theorem parse?_some_to_run?_some (h : parse? inp = some e) :
+    ∃ σ, ParseState.run? inp = some σ ∧ σ.output = [e] := by
   rw [parse?] at h
-  split at h <;> simp_all
+  split at h
+  · contradiction
+  next σ hr =>
+    simp_all only [Option.some.injEq, exists_eq_left']
+    have hs := ParseState.run?_output_singleton hr
+    exact h ▸ List.length_one_eq_getElem_zero hs
 
-theorem parse?_some_iff : (∃ e, parse? inp = some e) ↔ (inp.args.length = inp.ops.length + 1) where
-  mp h := parse?_some_to_run?_some h.choose_spec |>.choose_spec |> ParseState.run?_some
+theorem parse?_some_iff : (∃ e, parse? inp = some e) ↔ inp.Wellformed where
+  mp h := by
+    have ⟨_, h, _⟩ := parse?_some_to_run?_some h.choose_spec
+    exact ParseState.run?_some h
   mpr h := by
-    have := ParseState.run?_wf (σ₁ := { inp with }) ⟨h, rfl⟩
+    have := @ParseState.run?_wf inp ⟨h, rfl⟩
     rw [parse?]
     split <;> simp_all
 
 theorem parse?_lits_eq_args (h : parse? inp = some e) : e.lits = inp.args := by
-  rw [parse?] at h
-  split at h
-  · contradiction
-  next σ hr =>
-    injection h with h
-    have hs := ParseState.run?_output_singleton hr
-    have : σ.output = [e] := h ▸ List.length_one_eq_getElem_zero hs
-    have := ParseState.run?_lits hr
-    simp_all [ParseState.lits, ParseState.lits.go]
+  have ⟨_, h, _⟩ := parse?_some_to_run?_some h
+  have := ParseState.run?_lits h
+  simp_all [ParseState.lits, ParseState.lits.go]
 
 theorem parse?_apps_eq_ops (h : parse? inp = some e) : e.apps = inp.ops := by
+  have ⟨σ, h, ho⟩ := parse?_some_to_run?_some h
   sorry
 
+theorem parse?_lawful (h : parse? inp = some e) : Lawful e := by
+  have ⟨_, h, _⟩ := parse?_some_to_run?_some h
+  have := ParseState.run?_lawful inp.lawful h
+  grind [ParseState.Lawful, Expr.Lawful]
+
 theorem parse?_some_equiv (h : parse? inp = some e) : e ≈ inp where
   ops    := parse?_apps_eq_ops h
   args   := parse?_lits_eq_args h
-  lawful := sorry
+  lawful := parse?_lawful h
 
 theorem parse?_correct : Parser.Correct parse? where
   success := Expr.parse?_some_iff
@@ -381,9 +440,9 @@ theorem parse?_correct : Parser.Correct parse? where
 
 end Expr
 
-def doAlgebra (ops : List Op) (args : List Nat) (h : args.length = ops.length + 1 := by decide) : Nat :=
-  have h := Option.isSome_iff_exists.mpr <| Expr.parse?_some_iff.mpr h
-  Expr.parse? { ops, args } |>.get h |>.eval
+def doAlgebra (ops : List Op) (args : List Nat) (h : Input.Wellformed { ops, args } := by decide) : Nat :=
+  have parse?_isSome := Option.isSome_iff_exists.mpr <| Expr.parse?_some_iff.mpr h
+  Expr.parse? { ops, args } |>.get parse?_isSome |>.eval
 
 example : doAlgebra [.add, .mul, .sub] [2, 3, 4, 5] = 9  := by native_decide
 example : doAlgebra [.pow, .mul, .add] [2, 3, 4, 5] = 37 := by native_decide