-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathprep_bedit_data.sh
executable file
·98 lines (81 loc) · 2.83 KB
/
prep_bedit_data.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/bin/bash
# Copyright 2019 Nagoya University (Takenori Yoshimura)
# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
. ./path.sh || exit 1;
. ./cmd.sh || exit 1;
# general configuration
stage=-1
stop_stage=100
nj=16 # numebr of parallel jobs
verbose=1 # verbose option (if set > 1, get more log)
seed=1 # random seed number
# feature extraction related
fs=16000 # sampling frequency
fmax=7600 # maximum frequency
fmin=80 # minimum frequency
n_mels=80 # number of mel basis
n_fft=1024 # number of fft points
n_shift=200 # number of shift points
win_length=800 # window length
# exp tag
tag="" # tag for managing experiments.
. utils/parse_options.sh || exit 1;
# Set bash to 'debug' mode, it will exit on :
# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
set -e
set -u
set -o pipefail
dumpdir=hifitts_kaldi
dict=lexicon.txt
data_set=test
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
echo "stage 0: Data preparation"
ali_dir=kaldi/egs/hifitts/s5_16k/exp
local/phn_data_prep.sh ${ali_dir}/tri5a_ali ${dumpdir}/${data_set}
fi
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
echo "stage 1: Ali-information to json"
for x in ${data_set};
do
sort ${dumpdir}/${x}/phn_text -o ${dumpdir}/${x}/phn_text
sort ${dumpdir}/${x}/phn_duration -o ${dumpdir}/${x}/phn_duration
python ${dumpdir}/scripts/data2json_cs.py \
--text ${dumpdir}/${x}/text \
--phn_text ${dumpdir}/${x}/phn_text \
--w2p_dict ${dict} \
--phn_duration ${dumpdir}/${x}/phn_duration \
--output_json ${dumpdir}/${x}/${x}.json
done
fi
# For training data
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
echo "For training"
echo "stage 2: words position"
for x in ${data_set};
do
python ${dumpdir}/scripts/words_position.py \
--json ${dumpdir}/${x}/${x}.json \
--phone_output ${dumpdir}/${x}/phone_positions.json \
--frame_output ${dumpdir}/${x}/frame_positions.json
done
fi
# For decoding data
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
echo "stage 3: Mask words in json data"
for x in ${data_set};
do
python ${dumpdir}/scripts/random_word_mask.py \
--json ${dumpdir}/${x}/${x}.json \
--output ${dumpdir}/${x}/position.json
done
fi
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
echo "stage 4: Json to Espnet addable json"
for x in ${data_set};
do
python ${dumpdir}/scripts/json2add_json.py \
--json ${dumpdir}/${x}/position.json \
--frame_positions_out ${dumpdir}/${x}/frame_positions.json \
--phone_positions_out ${dumpdir}/${x}/phone_positions.json
done
fi