-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathutil.py
52 lines (40 loc) · 1.29 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import pickle
import os
def read_txt(filename):
res = list()
with open(filename, 'rb') as f:
for line in f:
res.append(line.decode('utf-8').strip())
return res
def read_txt_to_dict(filename):
labels = read_txt(filename)
#
label_dict = {}
for i, label in enumerate(labels):
label_dict[label] = i
return label_dict
def save_to_txt(filename, content):
with open(filename, 'w', encoding='utf-8') as f:
f.write('\n'.join(content))
def save_to_pickle(filename, content):
dir_path = os.path.dirname(filename)
if not os.path.exists(dir_path):
os.makedirs(dir_path)
with open(filename, 'wb') as f:
pickle.dump(content, f)
def read_pickle(filename):
pkl_file = open(filename, 'rb')
data = pickle.load(pkl_file)
return data
def pad_sentences(sentences, max_length, padding_word="</s>"):
"""
Pads all sentences to the same length. The length is defined by the longest sentence.
Returns padded sentences.
"""
padded_sentences = []
for i in range(len(sentences)):
sentence = sentences[i]
num_padding = max_length - len(sentence)
new_sentence = sentence + [padding_word] * num_padding
padded_sentences.append(new_sentence)
return padded_sentences