-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpbt_recog_bon.py
118 lines (101 loc) · 3.66 KB
/
pbt_recog_bon.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import asyncio
import time
import traceback
import sys
import jsonlines
from typing import List, Dict, Any
from dataclasses import dataclass
import copy
from LeanTool.leantool import interactive_lean_check, models
import tester
TEMP=0.7
def generate_recog_prompt(problem: Dict[str,Any]) -> str:
return f"""You are given a coding problem description, and a formal specification of the requirements in Lean 4.
You are also given a candidate solution to the problem,
and outputs from a property-based testing procedure that checks the output of the candidate solution given random inputs against the specifications.
If the property-based testing indicates there are failed test cases, determine why the tests failed, and try to fix the errors in the candidate
solution.
Problem Description:
```
{problem['description']}
```
The Lean 4 function should have the following signature:
```
{problem['function_signature']}
```
The formal specification:
```
{problem.get('property_def', '')}
{problem['theorem_signature']}
{problem.get('theorem2_signature', '')}
```
The candidate solution:
```
{problem['code_solution']}
```
Outputs from the property-based testing procedure, indicating the input-output values from the candidate solution that failed to satisfy the specification:
{problem['pbt_results']}
You are encouraged to think step by step. You do not need to prove the theorem statements.
Use the provided tool to further test your modified Lean code on the input values that the original solution failed on.
Iterate until your code produces correct outputs for those inputs.
You may further test your code on other inputs as well.
"""
async def solve_recog (problem: Dict[str, Any], model='sonnet', max_attempts=10):
prompt = generate_recog_prompt(problem)
res=await interactive_lean_check(prompt, model=models[model], max_attempts=max_attempts,temperature=TEMP)
print (res)
out=''
for att in res['attempts']:
out+='\nAttempt:\n'
if 'thought' in att: out+=str(att.get('thought',''))+'\n'
out+=att.get('code','')+'\n'
out+=str(att.get('result',''))
if out=='': out=res['messages'][-1]['content']
print (out)
ret={'output':out}
if 'final_code' in res:
lt=tester.LeanTester()
_,output,stats=lt.test_solution(res['final_code'], problem)
print(output)
print(stats)
ret['output']+='\n'+ res['final_code']
ret['tests_passed']=stats['passed']
ret['tests_total']=stats['total']
return ret
async def main():
with jsonlines.open(sys.argv[1]) as f:
with jsonlines.open(sys.argv[2], mode='w') as fout:
for jo in f:
await asyncio.sleep(1)
if len(sys.argv)>3:
model=sys.argv[3]
else:
model='sonnet'
print ('using model', model)
if len(sys.argv)>4:
max_att=sys.argv[4]
else:
max_att=3
inp_jo=copy.deepcopy(jo)
if 'statement' in jo:
inp_jo['description']=jo['statement']
else:
inp_jo['description']=jo['description']
#print (json.dumps(inp_jo,indent=4))
try:
for attempts in range(max_att):
solution=await solve_recog(inp_jo, model=model)
if solution.get('tests_total',0)>0 and solution['tests_passed']==solution['tests_total']:
break
out_jo=copy.deepcopy(inp_jo)
out_jo['recog_solution']=solution
except Exception as e:
print ('Error:')
print (e)
traceback.print_exc()
continue
if not out_jo: continue
#print(json.dumps(out_jo,indent=4))
fout.write(out_jo)
if __name__=='__main__':
asyncio.run(main())