forked from megagonlabs/doduo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
59 lines (41 loc) · 1.35 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import argparse
import os
from collections import defaultdict
import json
import pandas as pd
from doduo.doduo import Doduo
def write_json(path, filename, result):
if not os.path.exists(path):
os.makedirs(path)
with open(path + "/" + filename[:-3] + "json", "w", encoding="utf-8") as file:
json.dump(result, file, indent=4, ensure_ascii=False)
def read_tables(source):
with os.scandir(source) as files:
file_list = []
for file in files:
file_list.append(str(file.name))
return file_list
def write_total_score(path, result):
if not os.path.exists(path):
os.makedirs(path)
with open(path, "a", encoding="utf-8") as file:
file.write(result)
SOURCE_PATH = './uploads/'
RESULT_PATH = './result/'
TOTAL_SCORE_PATH = "./total_score.txt"
files = read_tables(SOURCE_PATH)
args = argparse.Namespace
args.model = "viznet" # or args.model = "viznet" wikitable
doduo = Doduo(args)
str_type = ""
for file in files:
df = pd.read_csv(SOURCE_PATH + file)
res = doduo.annotate_columns(df)
result = defaultdict()
for i in range(len(res.coltypes)):
result[df.columns[i]] = res.coltypes[i]
str_type += " " + res.coltypes[i]
write_total_score(TOTAL_SCORE_PATH, file +": "+ str_type + "\n")
write_json(RESULT_PATH, file, result)
result.clear()
str_type = ""