-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtoolkit.py
executable file
·206 lines (189 loc) · 8.38 KB
/
toolkit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
from flask import Flask
from flask import render_template
from flask import request
from threading import Thread
from capstone.store import *
from capstone.config import *
from capstone.util import *
from capstone.rank import *
import webbrowser
import subprocess
import time
import numpy
import os
from flask_cors import CORS
url = 'http://127.0.0.1:5000/'
app = Flask(__name__)
CORS(app)
dataset = load_from_file(file_format='json', filename=json_output_filename)
current_doc_name = None
current_symbol_expr = None
symbol_detector = SymbolDetector(symlist_filename=symlist_filename)
hints = ['popover', 'floating']
@app.route('/', methods=['GET'])
def home():
if request.method == 'GET':
doc_menu = []
for doc_name in dataset.keys():
for hint in hints:
display_name = doc_name + '(' + hint + ')'
url = 'http://localhost:8080/' + doc_name + '/' + hint + '.html'
doc_menu.append({
'display_name': display_name,
'url': url
})
return render_template('home_page.html', doc_menu=doc_menu)
return render_template('error.html')
@app.route('/demo', methods=['POST', 'GET'])
def demo():
doc_list = dataset.keys()
if request.method == 'GET':
return render_template('demo.html', sentence_list=[], doc_list=doc_list)
if request.method == 'POST':
doc_name = request.form['doc_name']
symbol_expr = request.form['symbol_expr']
code, filtered_sentences = filter_by_doc_symbol(dataset=dataset,
symbol_detector=symbol_detector, doc_name=doc_name, symbol_expr=symbol_expr)
if code == 'success':
senFile = {
'doc_name': doc_name,
'symbol_expr': symbol_expr,
'sentences': filtered_sentences,
}
ranked_sentences = rank(senFile, algoName = 'ML' ,prob = True, threshold = 0.45)
return render_template('demo.html', sentence_list=ranked_sentences, doc_list=doc_list, doc_name=doc_name, symbol_expr=symbol_expr)
if code == 'symbol_not_exist':
return render_template('demo.html', sentence_list=[], doc_list=doc_list, doc_name=doc_name, symbol_expr=symbol_expr)
return render_template('error.html')
@app.route('/searchapi/<string:doc_name>/<string:symbol_expr>', methods=['GET'])
def search_api(doc_name, symbol_expr):
print(doc_name, symbol_expr)
code, filtered_sentences = filter_by_doc_symbol(dataset=dataset,
symbol_detector=symbol_detector, doc_name=doc_name, symbol_expr=symbol_expr)
if code == 'success':
senFile = {
'doc_name': doc_name,
'symbol_expr': symbol_expr,
'sentences': filtered_sentences,
}
# ranked_sentences = rank(senFile, algoName = 'ML' ,prob = True, threshold = 0.45)
return render_template('results.html', sentence_list=filtered_sentences)
else:
return "<p>Not found</p>"
@app.route('/search', methods=['POST', 'GET'])
def search():
doc_list = dataset.keys()
if request.method == 'GET':
return render_template('search.html', sentence_list=[], doc_list=doc_list)
if request.method == 'POST':
doc_name = request.form['doc_name']
symbol_expr = request.form['symbol_expr']
code, filtered_sentences = filter_by_doc_symbol(dataset=dataset,
symbol_detector=symbol_detector, doc_name=doc_name, symbol_expr=symbol_expr)
if code == 'success':
senFile = {
'doc_name': doc_name,
'symbol_expr': symbol_expr,
'sentences': filtered_sentences,
}
ranked_sentences = rank(senFile, algoName = 'ML' ,prob = True, threshold = 0.45)
return render_template('search.html', sentence_list=ranked_sentences, doc_list=doc_list, doc_name=doc_name, symbol_expr=symbol_expr)
if code == 'symbol_not_exist':
return render_template('search.html', sentence_list=[], doc_list=doc_list, doc_name=doc_name, symbol_expr=symbol_expr)
return render_template('error.html')
@app.route('/label', methods=['POST', 'GET'])
def label():
global current_doc_name
global current_symbol_expr
if request.method == 'GET':
return render_template('label_tools.html', sentence_list=[])
if request.method == 'POST':
action_type = request.form['btn_name']
if action_type == 'search':
doc_name = request.form['doc_name']
current_doc_name = doc_name
symbol_expr = request.form['symbol_expr']
current_symbol_expr = symbol_expr
code, filtered_sentences = filter_by_doc_symbol(dataset=dataset,
symbol_detector=symbol_detector, doc_name=doc_name, symbol_expr=symbol_expr)
if code == 'success':
return render_template('label_tools.html', sentence_list=filtered_sentences)
if code == 'symbol_not_exist':
return render_template('label_tools.html', sentence_list=[])
if action_type == 'save_overall':
data = dict(request.form)
del data['btn_name']
for key, val in data.items():
idx = int(key)
label = val
dataset[current_doc_name][idx]['label'] = label[0]
save_to_file(file_format='json', filename=json_output_filename, data=dataset)
return render_template('label_tools.html', sentence_list=[])
if action_type == 'save_separate':
data = dict(request.form)
del data['btn_name']
output_sentences = []
for key, val in data.items():
idx = int(key)
label = val
sentence = dataset[current_doc_name][idx]
sentence['label'] = label[0]
output_sentences.append(sentence)
output_data = {
'doc_name': current_doc_name,
'symbol_expr': current_symbol_expr,
'sentences': output_sentences
}
symbol_field = current_symbol_expr
symbol_field = symbol_field.replace('\\', '')
output_filename = output_root + '/' + current_doc_name + '_' + symbol_field + '.json'
save_to_file(file_format='json', filename=output_filename, data=output_data)
return render_template('label_tools.html', sentence_list=[])
if action_type == 'save_both':
data = dict(request.form)
del data['btn_name']
for key, val in data.items():
idx = int(key)
label = val
dataset[current_doc_name][idx]['label'] = label[0]
save_to_file(file_format='json', filename=json_output_filename, data=dataset)
output_sentences = []
for key, val in data.items():
idx = int(key)
label = val
sentence = dataset[current_doc_name][idx]
sentence['label'] = label[0]
output_sentences.append(sentence)
output_data = {
'doc_name': current_doc_name,
'symbol_expr': current_symbol_expr,
'sentences': output_sentences
}
symbol_field = current_symbol_expr
symbol_field = symbol_field.replace('\\', '')
output_filename = output_root + '/' + current_doc_name + '_' + symbol_field + '.json'
save_to_file(file_format='json', filename=output_filename, data=output_data)
return render_template('label_tools.html', sentence_list=[])
return render_template('error.html')
def open_browser():
print('Starting browser ...')
time.sleep(3)
webbrowser.open_new(url)
def start_static_server():
current_dir = os.getcwd()
env = os.environ
os.chdir(os.path.join(current_dir, 'data/html_files'))
proc = subprocess.Popen(['python', '-m', 'http.server', '8080'], env=env)
os.chdir(current_dir)
def augment_html_files():
tex_filenames, html_filenames, tex_paths, html_paths, tex_names = list_tex_dest_paths('data')
for html_path, tex_name in zip(html_paths, tex_names):
for hint in hints:
print('converting ', hint, ' version for ', tex_name, '...')
convert_html_file(html_path, hint)
if __name__ == '__main__':
augment_html_files()
start_static_server()
browser_t = Thread(target=open_browser)
browser_t.start()
app.run()