-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtemplate.py
160 lines (135 loc) · 6 KB
/
template.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
class BaseTemplate(object):
def __init__(self):
self.hparams = {
# User-specified directories
"workdir": "~/sockeye_trial/",
"datadir": "$workdir/data",
"modeldir": "$workdir/model",
"rootdir": "/home/hltcoe/kduh/src/mt/sockeye-recipes/",
# Language pair (source and target) ###
"src_lang": "dfsdfsde",
"trg_lang": "en",
### Tokenized training and validation data ###
"train_tok": "$workdir/sample-de-en/train",
"valid_tok": "$workdir/sample-de-en/valid",
### Number of symbols to use for BPE ###
"bpe_symbols_src": "4000",
"bpe_symbols_trg": "4000",
### Filename for BPE-processed bitext file ###
# Note: the following default names should be fine for most use cases
"train_bpe": "$datadir/train.bpe-${bpe_symbols_src}",
"valid_bpe": "$datadir/valid.bpe-${bpe_symbols_src}",
### Filename for BPE vocabulary ###
# Note: the following default names should be fine for most use cases
# Note: bpe_vocab_src will be needed for applying BPE to test, in translate.sh
"bpe_vocab_src": "${train_bpe}.$src.bpe_vocab",
"bpe_vocab_trg": "${train_bpe}.$trg.bpe_vocab",
# Model architecture
"num_embed": "50:50",
"rnn_num_hidden": "50",
"attention_type": "dot",
"num_layers": "1",
"rnn_cell_type": "lstm",
# Training configuration
"max_seq_len": "100:100",
"num_words": "4000:4000",
"word_min_count": "1:1",
"batch_size": "64",
"min_num_epochs": "0",
"embed_dropout": ".0:.0",
# Logging and stopping condition
"checkpoint_frequency": "100",
"max_updates": "400",
"keep_last_params": "-1",
}
self.template = '''
#####################################################################
# Hyperparameters for preprocess-bpe.sh and train.sh #
# #
# Overview: #
# - "workdir" corresponds a group of preprocessed bitext and models #
# for a given dataset. Each "workdir" can contain multiple #
# "datadir" and "modeldir" if desired #
# - "datadir" stores the BPE-preprocessed training and validation #
# bitext files #
# - "modeldir" is generated by Sockeye and stores all training info #
# - "rootdir" is path to your installation of sockeye-recipes, #
# e.g. ~/src/sockeye-recipes #
# #
# preprocess-bpe.sh: #
# - input: Tokenized bitext for training ("train_tok") and #
# and validation ("valid_tok") #
# - output: BPE-preprocessed bitext ("train_bpe", "valid_bpe") #
# and vocabulary ("bpe_vocab_src", "bpe_vocab_trg") #
# - main hyperparameters: number of BPE symbols for source & target #
# #
# train.sh: #
# - input: BPE-preprocessed bitext ("train_bpe", "valid_bpe") #
# - output: "modeldir", which contains all training info and can #
# be used to translate #
# - main hyperparameters: many! see below #
# #####################################################################
#####################################################################
# (0) General settings (to be modified for each project) #
#####################################################################
### User-specified directories ###
workdir=%s
datadir=%s
modeldir=%s
rootdir=%s
### Language pair (source and target) ###
# Note: We assume all bitext files contain these as suffices.
# e.g. $train_tok.$src, $train_tok.$trg refer to the source and target
src=%s
trg=%s
### Tokenized training and validation data ###
# Note we assume tokenization is already done, and will only run BPE
# For tokenization and other preprocessing, see preprocess-tokenize.sh
# which does not use this hyperparam.txt file
train_tok=%s
valid_tok=%s
#####################################################################
# (1) preprocess-bpe.sh settings (modify if needed) #
#####################################################################
### Number of symbols to use for BPE ###
# Note: we perform source and target BPE separately
# This corresponds to initial source (src) and target (trg) vocab size
bpe_symbols_src=%s
bpe_symbols_trg=%s
### Filename for BPE-processed bitext file ###
# Note: the following default names should be fine for most use cases
train_bpe=%s
valid_bpe=%s
### Filename for BPE vocabulary ###
# Note: the following default names should be fine for most use cases
# Note: bpe_vocab_src will be needed for applying BPE to test, in translate.sh
bpe_vocab_src=%s
bpe_vocab_trg=%s
#####################################################################
# (2) train.sh settings (modify if needed) #
#####################################################################
# Model architecture
num_embed=%s
rnn_num_hidden=%s
attention_type=%s
num_layers=%s
rnn_cell_type=%s
# Training configuration
max_seq_len=%s
num_words=%s
word_min_count=%s
batch_size=%s
min_num_epochs=%s
embed_dropout=%s
# Logging and stopping condition
checkpoint_frequency=%s
max_updates=%s
keep_last_params=%s
'''
def hparam_list(self):
return list(self.hparams.keys())
def render(self):
hparams = tuple(self.hparams.values())
return self.template % hparams
def update(self, updated_dict):
self.hparams.update(updated_dict)