-
Notifications
You must be signed in to change notification settings - Fork 0
/
readContractPairs.py
52 lines (37 loc) · 1.62 KB
/
readContractPairs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import os
import argparse
import sys
from util.util import checkToSkip,Progbar
from util.constant import ROOT_PATH
def main(option):
contract_file = os.path.join(option.rootpath, opt.trainCollection, 'TextData', 'concept',
'concept_frequency_count_gt'+str(option.threshold)+'.'+option.concept_bank+'.txt.contradict')
savename = contract_file+'.contradict_pairs'
if checkToSkip(savename, option.overwrite):
sys.exit(0)
with open(contract_file,'r',encoding='utf-8') as reader:
in_lines = reader.readlines()
pbar = Progbar(len(in_lines))
out_lines = []
num = 0
for in_line in in_lines:
in_line_split = in_line.split('<->')
if len(in_line_split)==2:
concept_i =in_line_split[0]
antonyms_i = in_line_split[1]
out_lines.append(in_line)
pbar.add(1)
num = num+1
# if num>1000:
# break
with open(savename,'w',encoding='utf-8') as writer:
writer.writelines(out_lines)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--rootpath', type=str, default=ROOT_PATH,help='path to datasets')
parser.add_argument('trainCollection', type=str, help='train collection')
parser.add_argument('--overwrite', type=int, default=0, choices=[0,1], help='overwrite existed file. (default: 0)')
parser.add_argument('--concept_bank', type=str, default='concept_word', help='concept_bank filename')
parser.add_argument('--threshold', type=int, default=5, help='concept frequence threshold')
opt = parser.parse_args()
main(opt)