-
Notifications
You must be signed in to change notification settings - Fork 1
/
experiments.conf
169 lines (153 loc) · 4.24 KB
/
experiments.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# Word embeddings.
glove_300d {
path = glove.840B.300d.txt
size = 300
}
glove_300d_filtered {
path = glove.840B.300d.txt.filtered
size = 300
}
glove_300d_2w {
path = glove_50_300_2.txt
size = 300
}
glove_300d_2w_filtered {
path = glove_50_300_2.txt.filtered
size = 300
}
# Main configuration.
rst_antecedent_selection {
# Computation limits.
max_top_antecedents = 150
max_training_words = 1000
# Model hyperparameters.
filter_widths = [3, 4, 5]
filter_size = 50
char_embedding_size = 8
char_vocab_path = "char_vocab.english.txt"
context_embeddings = ${glove_300d_filtered}
head_embeddings = ${glove_300d_2w_filtered}
contextualization_size = 200
contextualization_layers = 3
ffnn_size = 150
ffnn_depth = 2
feature_size = 20
max_span_width = 30
use_metadata = true
use_features = true
model_heads = true
lm_layers = 4
lm_size = 1024
# Learning hyperparameters.
max_gradient_norm = 5.0
lstm_dropout_rate = 0.4
lexical_dropout_rate = 0.5
dropout_rate = 0.2
optimizer = adam
learning_rate = 0.001
decay_rate = 0.999
decay_frequency = 100
# Other.
train_path = rst.train.bridging.jsonlines
second_train_path = ""
eval_path = rst.dev.bridging.jsonlines
lm_path = bert_features.hdf5
test_path = rst.test.bridging.jsonlines
genres = ["bc", "bn", "mz", "nw", "pt", "tc", "wb"]
eval_frequency = 500
report_frequency = 100
log_root = logs
max_step = 10000
use_gold_bridging_anaphora=true
use_gold_mentions=false
train_with_coref=true
shared_depth=1
remove_coref_anaphora=false
ntimes_negative_examples=1
undersampling_probability = [1.0,1.0]
second_undersampling_probability = [1.0,1.0]
cross_validation_fold = 0
has_multi_bridging_ant=false
}
rst_full_bridging = ${rst_antecedent_selection}{
use_gold_bridging_anaphora=false
use_gold_mentions=true
ntimes_negative_examples=2
undersampling_probability = [0.07,0.14]
remove_coref_anaphora=true
}
pear_antecedent_selection = ${rst_antecedent_selection}{
train_path = pear.bridging.jsonlines
lm_path = bert_features.hdf5
genres = ["nw", "pe", "tr"]
max_step = 2500
cross_validation_fold = 10
eval_on_test_part_only=true
}
pear_full_bridging = ${pear_antecedent_selection}{
use_gold_bridging_anaphora=false
use_gold_mentions=true
ntimes_negative_examples=2
undersampling_probability = [0.1,0.1]
remove_coref_anaphora=true
}
trains93_antecedent_selection = ${rst_antecedent_selection}{
train_path = trains93.bridging.jsonlines
lm_path = bert_features.hdf5
genres = ["nw", "pe", "tr"]
max_step = 2500
cross_validation_fold = 10
eval_on_test_part_only=true
}
trains93_full_bridging = ${trains93_antecedent_selection}{
use_gold_bridging_anaphora=false
use_gold_mentions=true
ntimes_negative_examples=2
undersampling_probability = [0.1,0.1]
remove_coref_anaphora=true
}
isnotes_antecedent_selection = ${rst_antecedent_selection}{
train_path = isnotes.bridging.jsonlines
lm_path = bert_features.hdf5
max_step = 2500
second_train_path = bashi.bridging.jsonlines
skip_comparative_bridging=true
cross_validation_fold = 10
}
isnotes_full_bridging = ${isnotes_antecedent_selection}{
use_gold_bridging_anaphora=false
use_gold_mentions=true
ntimes_negative_examples=2
undersampling_probability = [0.09,0.22]
second_undersampling_probability = [0.03,0.1]
remove_coref_anaphora=true
}
bashi_antecedent_selection = ${rst_antecedent_selection}{
train_path = bashi.bridging.jsonlines
lm_path = bert_features.hdf5
second_train_path = isnotes.bridging.jsonlines
max_step = 2500
cross_validation_fold = 10
has_multi_bridging_ant=true
}
bashi_full_bridging = ${bashi_antecedent_selection}{
use_gold_bridging_anaphora=false
use_gold_mentions=true
ntimes_negative_examples=2
undersampling_probability = [0.03,0.1]
second_undersampling_probability = [0.09,0.22]
remove_coref_anaphora=true
}
scicorp_antecedent_selection = ${rst_antecedent_selection}{
train_path = scicorp.bridging.jsonlines
lm_path = bert_features.hdf5
max_step = 2500
cross_validation_fold = 10
}
scicorp_full_bridging = ${scicorp_antecedent_selection}{
use_gold_bridging_anaphora=false
use_gold_mentions=true
ntimes_negative_examples=2
undersampling_probability = [0.05,0.31]
remove_coref_anaphora=true
}