-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmain.py
157 lines (118 loc) · 5.29 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import os
from os.path import join
from quart import Quart
import traceback
import config
import inc.api
import inc.horizontal.api_categories
import inc.horizontal.api_categories_2_descriptions
import inc.entities.api_categories_2_entities
import inc.horizontal.api_categories_subclasses
import inc.horizontal.api_categories_subclasses_2_descriptions
import inc.entities.api_categories_subclasses_2_entities
import inc.api_categories_gt_manager
import inc.api_classes
import inc.api_types
import inc.api_redirects
import inc.disambiguation
from util import util_log, util_count, util_split, util_entity_split
util_log.init("wikidata2tables_generator.log")
util_log.info("Creating benchmark: {} ".format(config.BENCHMARK_NAME))
util_log.info("Categories are: {}".format(config.CATEGORIES_FILE_NAME))
app = Quart(__name__)
app.debug = False
@app.route('/test')
async def routeTest():
# TODO: Add any API you want to test here!
res = {}
return res
async def generate_horizontal_tables():
# save horizontal tables (WITHOUT) long descriptions (Props ONLY)
util_log.info(message='[Horizontal][PROPS Only] Categories to instances and their instances')
res = await inc.horizontal.api_categories.parse_categories()
util_log.info(message='[Horizontal][PROPS Only] Categories to instances and their subclasses+instances')
res = await inc.horizontal.api_categories_subclasses.parse_categories()
os.rename(join(config.HORIZONTAL_PATH, 'processed_entities.txt'),
join(config.HORIZONTAL_PATH, 'processed_entities_props_only.txt'))
# save horizontal tables PROPS + long DESCRIPTIONS
util_log.info(message='[Horizontal][Props+Des] Categories to instances and their instances')
inc.horizontal.api_categories.include_categories_desc = True
res = await inc.horizontal.api_categories.parse_categories()
util_log.info(message='[Horizontal][Props+Des] Categories to instances and their subclasses+instances')
inc.horizontal.api_categories_subclasses.include_categories_desc = True
res = await inc.horizontal.api_categories_subclasses.parse_categories()
os.rename(join(config.HORIZONTAL_PATH, 'processed_entities.txt'),
join(config.HORIZONTAL_PATH, 'processed_entities_props+descriptions.txt'))
# save horizontal DESCRIPTIONS only
util_log.info(message='[Horizontal][DESC Only] Categories to instances and their instances')
res = await inc.horizontal.api_categories_2_descriptions.parse_categories()
util_log.info(message='[Horizontal][DESC Only] Categories to instances and their subclasses+instances')
res = await inc.horizontal.api_categories_subclasses_2_descriptions.parse_categories()
os.rename(join(config.HORIZONTAL_PATH, 'processed_entities.txt'),
join(config.HORIZONTAL_PATH, 'processed_entities_descriptions_only.txt'))
return res
async def generate_entity_tables():
"""
Generates the Entity Tables from given categories.csv
"""
# save entity tables from instances and subclasses
util_log.info(message='[Entities] Categories to instances and their instances')
res = await inc.entities.api_categories_2_entities.parse_categories()
util_log.info(message='[Entities] Categories to Subclasses and their subclasses+instances')
res = await inc.entities.api_categories_subclasses_2_entities.parse_categories()
return res
@app.route('/generate_tables', methods=['GET'])
async def routeGenerate_tables():
await generate_entity_tables()
await generate_horizontal_tables()
return {'message': 'Success: Horizontal and Entities Tables are fetched.'}
@app.route('/anonymize_tables', methods=['GET'])
async def routeAnonymize_tables():
"""
Apply SemTab format to the generated tables
Separate actual tables from `gt` and `targets` + anonymize tables names
"""
await inc.api_categories_gt_manager.anonymize_files()
return {'Message': 'Success, Tables are anonmized'}
@app.route('/count', methods=['GET'])
async def routeCount():
res = await util_count.count_everything()
return res
@app.route('/val_test_split', methods=['GET'])
async def routValTestSplit():
"""
val/test splits for each table type
"""
# Horizontal tables split
util_split.split()
# entity tables split
util_entity_split.split()
return {'message': 'success, validation and test splits are created.'}
@app.route('/generate_benchmark_at_once', methods=['GET'])
async def routeGenerate_benchmark_at_once():
"""
Main entry point to construct a domain-specific benchmark
"""
# 1. Create the benchmark tables
await routeGenerate_tables()
# 2. Anonymize the file name (should be backward traceable)
res = await routeAnonymize_tables()
# 3. Provides some statistics about the generated dataset
res = await routeCount()
# 4. val/test split creation
res = await routValTestSplit()
res.update({'message': 'Success: benchmark has been created'})
return res
# ~~~~~~~~~~~~~~~~~~~~~~ Default ~~~~~~~~~~~~~~~~~~~~~~
@app.errorhandler(500)
def handle_500(e):
"""output the internal error stack in case of unhandled exception"""
try:
raise e
except:
return traceback.format_exc(), 500
@app.route('/')
def routeRoot():
return 'wikidata2tables.generator.svc'
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5007)