-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun_experiments
executable file
·113 lines (98 loc) · 4.82 KB
/
run_experiments
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/bin/bash
function _sct_help {
echo 'Script options:'
echo ' setup - Install requirements in the virtual environment.'
echo ' Loads skip-thoughts embeddings and glove embeddings.'
echo ''
echo ' bert - Runs bert classifier. Creates best predictions for labeled and unlabeled test set. '
echo ' simple - Runs models of paper FC-skip and LS-skip.'
echo ' word_based - BiLSTM encoder and decoder with attention.'
echo ' sentiment_analysis - Runs model that predicts polarity of the fifth sentence.'
echo ' predict_context - Runs model that predicts the context of the fifth sentence.'
echo ' all - Runs model that predicts the context of the fifth sentence.'
echo ''
}
function _sct_setup {
pip install -r requirements.txt
python -c 'import nltk; nltk.download("punkt"); nltk.download("averaged_perceptron_tagger"); nltk.download("stopwords"); nltk.download("wordnet");'
if [ ! -f data/glove-embeddings/glove.6B.100d.txt ]; then
echo "Downloading glove embeddings..."
cd data/glove-embeddings
wget https://nlp.stanford.edu/data/wordvecs/glove.6B.zip && \
unzip glove.6B.zip && \
rm glove.6B.zip glove.6B.200d.txt glove.6B.300d.txt glove.6B.50d.txt
cd ../..
fi
cd data/ROCStories
if [ ! -f train_stories.csv ]; then
wget --no-check-certificate https://polybox.ethz.ch/index.php/s/l2wM4RIyI3pD7Tl/download -O train_stories.csv
fi
if [ ! -f "cloze_test_val__spring2016 - cloze_test_ALL_val.csv" ]; then
wget --no-check-certificate https://polybox.ethz.ch/index.php/s/02IVLdBAgVcsJAx/download -O "cloze_test_val__spring2016 - cloze_test_ALL_val.csv"
fi
if [ ! -f test-stories.csv ]; then
wget --no-check-certificate https://polybox.ethz.ch/index.php/s/C9sYaIebnJnHlqi/download -O test-stories.csv
fi
cd ../..
cd data/skip-thoughts
if [ ! -f skip-thoughts-embeddings_train.npy ]; then
wget --no-check-certificate https://polybox.ethz.ch/index.php/s/NQ9OT8Xxvdxn3wo/download -O skip-thoughts-embeddings_train.npy
fi
if [ ! -f skip-thoughts-embeddings_validation.npy ]; then
wget --no-check-certificate https://polybox.ethz.ch/index.php/s/10CivpGpg8O1Bfe/download -O skip-thoughts-embeddings_validation.npy
fi
if [ ! -f skip-thoughts-embeddings_test.npy ]; then
wget --no-check-certificate https://polybox.ethz.ch/index.php/s/PKQm7YuCMsPhBv6/download -O skip-thoughts-embeddings_test.npy
fi
cd ../..
}
function _sct_bert {
EPOCHS=5
DATA_DIR="./data/ROCStories"
PERCENTAGE_SYNONYMS=0.2
NETWORK=bidirectional-1024-1-1-True-lstm:highway-3
LEARNING_RATE=2e-5
NUM_ESTIMATORS=10
bsub -n 2 -W 03:59 -R "rusage[mem=12000,ngpus_excl_p=1]" -R "select[gpu_model0==TeslaV100_SXM2_32GB]" -o lsf_bert.out python ./src/bert/bert_sct.py --data_dir ${DATA_DIR} --output_dir /scratch/${USER}/output_dir --tfhub_cache_dir /scratch/${USER}/tfhub_cache_dir --num_epochs ${EPOCHS} --learning_rate ${LEARNING_RATE} --num_estimators ${NUM_ESTIMATORS} --network ${NETWORK} --percentage_synonyms ${PERCENTAGE_SYNONYMS} --save_results_dir ./bert_predictions_${EPOCHS}_${PERCENTAGE_SYNONYMS}_${NETWORK}
}
function _sct_simple {
LOG_PATH="./log_path"
DATA_DIR="./data/ROCStories"
EPOCHS=10
BATCH_SIZE=32
LEARNING_RATE=1e-3
UNITS=4800
TRAIN_ON_VALIDATION=1
MODE="LS-skip"
if [ "$TRAIN_ON_VALIDATION" -eq 0 ]; then
MEMORY=18000
else
MEMORY=5000
fi
bsub -o lsf_simple_${TRAIN_ON_VALIDATION}_${MODE}_${LEARNING_RATE}.out -n 2 -W 03:59 -R "rusage[mem=${MEMORY},ngpus_excl_p=1]" python ./src/simple_effective_approach.py --data_dir /cluster/home/sanagnos/NLU/project2/data --log_path /scratch/${USER}/log_path --num_epochs ${EPOCHS} --learning_rate ${LEARNING_RATE} --verbose True --train_on_validation ${TRAIN_ON_VALIDATION} --mode ${MODE} --batch_size ${BATCH_SIZE} --log_path ${LOG_PATH} --data_dir ${DATA_DIR}
}
function _sct_word_based {
bsub -n 2 -W 4:00 -R "rusage[mem=2048, ngpus_excl_p=1]" -o lsf_run_word_based.out python src/word_based_model.py
}
function _sct_sentiment_analysis {
bsub -n 1 -W 8:00 -R "rusage[mem=64192, ngpus_excl_p=1]" -o lsf_run_sentiment_analysis.out python src/sentiment_analysis.py
}
function _sct_predict_context {
bsub -n 1 -W 8:00 -R "rusage[mem=64192, ngpus_excl_p=1]" -R "select[gpu_model0==TeslaV100_SXM2_32GB]" -o lsf_run_predict_context.out python src/predict_skip.py
}
function _sct_all {
eval _sct_bert
eval _sct_simple
eval _sct_word_based
eval _sct_sentiment_analysis
eval _sct_predict_context
}
for target in "$@"
do
function_not_exists=$(declare -f _sct_${target} > /dev/null; echo $?)
if [ "$function_not_exists" -eq "1" ]; then
echo "Error: ${target} target does not exist"
else
eval "_sct_${target}"
fi
done