-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmake_multi_bitext.py
More file actions
executable file
·29 lines (26 loc) · 1000 Bytes
/
make_multi_bitext.py
File metadata and controls
executable file
·29 lines (26 loc) · 1000 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#!/usr/bin/env python3
import os
from itertools import groupby
os.mkdir('multi')
for split in ['train', 'dev', 'test']:
src = []
tgt = []
for langpair, group in groupby(os.listdir('exp/' + split), lambda fname: fname[:7]):
for f in group:
srclang, tgtlang = f[:7].split('-')
if f.endswith('.idx'):
continue
elif f.endswith('.src'):
with open('exp/' + split + '/' + f) as fin:
for line in fin:
src.append('<' + srclang + '> ' + line.strip() + ' <' + tgtlang + '>')
elif f.endswith('.tgt'):
with open('exp/' + split + '/' + f) as fin:
for line in fin:
tgt.append(line.strip())
with open('multi/' + split + '.src', 'w') as f:
for word in src:
print(word, file=f)
with open('multi/' + split + '.tgt', 'w') as f:
for word in tgt:
print(word, file=f)