thualign/configs/user/spanish.config

[DEFAULT]

; change these to your data paths
datadir = corpus-es
train_input = ['${datadir}/corpus.32k.es.shuf', '${datadir}/corpus.32k.en.shuf']
valid_input = ['${datadir}/validation.32k.es', '${datadir}/validation.32k.en']
vocab = ['${datadir}/vocab.32k.es.txt', '${datadir}/vocab.32k.en.txt']
test_input = ['${datadir}/test.32k.es', '${datadir}/test.32k.en']
alignment_input = ['${datadir}/sentences.32k.es', '${datadir}/sentences.32k.en']
test_answers = ['${datadir}/answers.es', '${datadir}/answers.en']

; change these to your experiment directory
; all results are saved in the directory [output]
exp_dir = spanish-output
label = output
output = ${exp_dir}/${label}

; change these to match your own machine configuration
; the actual training 36k = batch_size * update_cycle * len(device_list)
batch_size = 1500
update_cycle = 24
device_list = [0]
half = False

; model configuration
; the hyperparameters [alpha, beta, lamb, th] are same as described in the paper
model = mask_align
agree_alpha = 5
entropy_loss = True
entropy_beta = 1
renorm_lamb = 0.05
extract_th = 0.2