run_experiments

#!/bin/bash

function _sct_help {
	echo  'Script options:'
	echo  '    setup		  	 - Install requirements in the virtual environment.'
	echo  '                                   Loads skip-thoughts embeddings and glove embeddings.'
	echo  ''
	echo  '    bert  			 - Runs bert classifier. Creates best predictions for labeled and unlabeled test set. '
	echo  '    simple			 - Runs models of paper FC-skip and LS-skip.'
	echo  '    word_based   		 - BiLSTM encoder and decoder with attention.'
	echo  '    sentiment_analysis   	 - Runs model that predicts polarity of the fifth sentence.'
	echo  '    predict_context  	         - Runs model that predicts the context of the fifth sentence.'
	echo  '    all			         - Runs model that predicts the context of the fifth sentence.'
	echo  ''
}

function _sct_setup {
    pip install -r requirements.txt 
    python -c 'import nltk; nltk.download("punkt"); nltk.download("averaged_perceptron_tagger"); nltk.download("stopwords"); nltk.download("wordnet");'
    
    if [ ! -f data/glove-embeddings/glove.6B.100d.txt ]; then
	echo "Downloading glove embeddings..."
	cd data/glove-embeddings
	wget https://nlp.stanford.edu/data/wordvecs/glove.6B.zip && \
        unzip glove.6B.zip && \
        rm glove.6B.zip glove.6B.200d.txt glove.6B.300d.txt glove.6B.50d.txt
	cd ../..
    fi
    
    cd data/ROCStories
    if [ ! -f train_stories.csv ]; then
	wget --no-check-certificate https://polybox.ethz.ch/index.php/s/l2wM4RIyI3pD7Tl/download -O train_stories.csv
    fi
    if [ ! -f "cloze_test_val__spring2016 - cloze_test_ALL_val.csv" ]; then
	wget --no-check-certificate https://polybox.ethz.ch/index.php/s/02IVLdBAgVcsJAx/download -O "cloze_test_val__spring2016 - cloze_test_ALL_val.csv"
    fi
    if [ ! -f test-stories.csv ]; then
	wget --no-check-certificate https://polybox.ethz.ch/index.php/s/C9sYaIebnJnHlqi/download -O test-stories.csv
    fi
    cd ../..
    
    cd data/skip-thoughts
    if [ ! -f skip-thoughts-embeddings_train.npy ]; then
	wget --no-check-certificate https://polybox.ethz.ch/index.php/s/NQ9OT8Xxvdxn3wo/download -O skip-thoughts-embeddings_train.npy
    fi
    
    if [ ! -f skip-thoughts-embeddings_validation.npy ]; then
        wget --no-check-certificate https://polybox.ethz.ch/index.php/s/10CivpGpg8O1Bfe/download -O skip-thoughts-embeddings_validation.npy
    fi

    if [ ! -f skip-thoughts-embeddings_test.npy ]; then
        wget --no-check-certificate https://polybox.ethz.ch/index.php/s/PKQm7YuCMsPhBv6/download -O skip-thoughts-embeddings_test.npy
    fi
    cd ../..    
}
 
function _sct_bert {
    EPOCHS=5
    DATA_DIR="./data/ROCStories"
    PERCENTAGE_SYNONYMS=0.2
    NETWORK=bidirectional-1024-1-1-True-lstm:highway-3
    LEARNING_RATE=2e-5
    NUM_ESTIMATORS=10
    bsub -n 2 -W 03:59 -R "rusage[mem=12000,ngpus_excl_p=1]" -R "select[gpu_model0==TeslaV100_SXM2_32GB]" -o lsf_bert.out python ./src/bert/bert_sct.py --data_dir ${DATA_DIR} --output_dir /scratch/${USER}/output_dir --tfhub_cache_dir /scratch/${USER}/tfhub_cache_dir --num_epochs ${EPOCHS} --learning_rate ${LEARNING_RATE} --num_estimators ${NUM_ESTIMATORS} --network ${NETWORK} --percentage_synonyms ${PERCENTAGE_SYNONYMS} --save_results_dir ./bert_predictions_${EPOCHS}_${PERCENTAGE_SYNONYMS}_${NETWORK}
}
 
function _sct_simple {
    LOG_PATH="./log_path"
    DATA_DIR="./data/ROCStories"
    EPOCHS=10
    BATCH_SIZE=32
    LEARNING_RATE=1e-3
    UNITS=4800
    TRAIN_ON_VALIDATION=1
    MODE="LS-skip"
    if [ "$TRAIN_ON_VALIDATION" -eq 0 ]; then
        MEMORY=18000
    else
        MEMORY=5000
    fi
    bsub -o lsf_simple_${TRAIN_ON_VALIDATION}_${MODE}_${LEARNING_RATE}.out -n 2 -W 03:59 -R "rusage[mem=${MEMORY},ngpus_excl_p=1]" python ./src/simple_effective_approach.py --data_dir /cluster/home/sanagnos/NLU/project2/data --log_path /scratch/${USER}/log_path --num_epochs ${EPOCHS} --learning_rate ${LEARNING_RATE} --verbose True --train_on_validation ${TRAIN_ON_VALIDATION} --mode ${MODE} --batch_size ${BATCH_SIZE} --log_path ${LOG_PATH} --data_dir ${DATA_DIR}
}

function _sct_word_based {
    bsub -n 2 -W 4:00 -R "rusage[mem=2048, ngpus_excl_p=1]" -o lsf_run_word_based.out python src/word_based_model.py
}

function _sct_sentiment_analysis {
    bsub -n 1 -W 8:00 -R "rusage[mem=64192, ngpus_excl_p=1]" -o lsf_run_sentiment_analysis.out python src/sentiment_analysis.py
}

function _sct_predict_context {
    bsub -n 1 -W 8:00 -R "rusage[mem=64192, ngpus_excl_p=1]" -R "select[gpu_model0==TeslaV100_SXM2_32GB]" -o lsf_run_predict_context.out python src/predict_skip.py
}

function _sct_all {
    eval _sct_bert
    eval _sct_simple
    eval _sct_word_based
    eval _sct_sentiment_analysis
    eval _sct_predict_context
}


for target in "$@"
do
    function_not_exists=$(declare -f _sct_${target} > /dev/null; echo $?)
    if [ "$function_not_exists" -eq "1" ]; then
	echo "Error: ${target} target does not exist"
    else
	eval "_sct_${target}"
    fi
done