diff --git a/scripts/run_report_confidence.sh b/scripts/run_report_confidence.sh new file mode 100755 index 0000000..fd3b494 --- /dev/null +++ b/scripts/run_report_confidence.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash +# +# Prepare summary report per measure with confidence intervals +set -e + +usage="Usage: $0 OUT_DIR MEASURE ..." + +if [ "$#" -lt 2 ]; then + echo $usage + exit 1 +fi + +outdir=$1; shift # directory to which results are written + +MEASURES=( + "strong_mention_match" + "strong_link_match" + "strong_nil_match" + "strong_all_match" + "strong_typed_all_match" + "entity_ceaf" + ) + +for measure in ${@} +do + echo "INFO preparing $measure report.." + + # INITIALISE REPORT HEADER + report=$outdir/00report.$measure + echo -e "90%(\t95%(\t99%(\tscore\t)99%\t)95%\t)90%\tsystem" \ + > $report + + # ADD SYSTEM SCORES + ( + for sys_eval in $outdir/*.confidence + do + cat $sys_eval \ + | grep "$measure" \ + | grep "fscore" \ + | awk 'BEGIN{OFS="\t"} {print $3,$4,$5,$6,$7,$8,$9}' \ + | tr '\n' '\t' + basename $sys_eval \ + | sed 's/\.confidence//' + done + ) \ + | sort -t$'\t' -k4 -nr \ + >> $report + +done diff --git a/scripts/run_tac13_report.sh b/scripts/run_tac13_report.sh index 6c581a0..abaea09 100755 --- a/scripts/run_tac13_report.sh +++ b/scripts/run_tac13_report.sh @@ -14,21 +14,32 @@ outdir=$1; shift # directory to which results are written # INITIALISE REPORT HEADER report=$outdir/00report.tab -echo -e "system\tKBP2010 micro-average\tB^3 Precision\tB^3 Recall\tB^3 F1" \ - > $report +( + echo -en "system" # run name + echo -en "\tKBP2010 micro-average" # overall linking score + echo -en "\tB^3 Precision\tB^3 Recall\tB^3 F1" # B^3 clustering scores + echo -e "\tB^3+ Precision\tB^3+ Recall\tB^3+ F1" # B^3+ clustering scores +) > $report # ADD SYSTEM SCORES -# TODO add B^3+ -for eval in $outdir/*.evaluation +for sys_eval in $outdir/*.evaluation do - basename $eval \ + basename $sys_eval \ | sed 's/\.evaluation//' \ | tr '\n' '\t' \ >> $report - cat $eval \ - | egrep '(strong_all_match|b_cubed)' \ - | cut -f5,6,7,8 \ + cat $sys_eval \ + | grep -P '\tstrong_all_match$' \ + | cut -f 7 \ | tr '\n' '\t' \ - | cut -f2,5,6,7 \ + >> $report + cat $sys_eval \ + | grep -P '\tb_cubed$' \ + | cut -f 5,6,7 \ + | tr '\n' '\t' \ + >> $report + cat $sys_eval \ + | grep -P '\tb_cubed_plus$' \ + | cut -f 5,6,7 \ >> $report done diff --git a/scripts/run_tac14_all.sh b/scripts/run_tac14_all.sh index d38d2a5..c5717a8 100755 --- a/scripts/run_tac14_all.sh +++ b/scripts/run_tac14_all.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Run TAC13 evaluation and analysis +# Run TAC14 evaluation and analysis set -e usage="Usage: $0 GOLD_XML GOLD_TAB SYSTEMS_DIR OUT_DIR" diff --git a/scripts/run_tac14_evaluation.sh b/scripts/run_tac14_evaluation.sh index 37939a9..8861c0c 100755 --- a/scripts/run_tac14_evaluation.sh +++ b/scripts/run_tac14_evaluation.sh @@ -36,3 +36,8 @@ ls $outdir/*.combined.tsv \ | grep -v "gold\.combined\.tsv$" \ | xargs -n 1 -P $jobs $SCR/run_evaluate.sh $gold + +# PREPARE SUMMARY REPORT +echo "INFO Preparing summary report.." +$SCR/run_tac14_report.sh $outdir + diff --git a/scripts/run_tac14_filtered.sh b/scripts/run_tac14_filtered.sh new file mode 100755 index 0000000..2363882 --- /dev/null +++ b/scripts/run_tac14_filtered.sh @@ -0,0 +1,87 @@ +#!/usr/bin/env bash +# +# Run TAC14 filtered evaluation and analysis +set -e + +usage="Usage: $0 GOLD_XML GOLD_TAB SYSTEMS_DIR OUT_DIR" + +if [ "$#" -ne 4 ]; then + echo $usage + exit 1 +fi + +goldx=$1; shift # gold standard queries/mentions (XML) +goldt=$1; shift # gold standard link annotations (tab-separated) +sysdir=$1; shift # directory containing output from systems +outdir=$1; shift # directory to which results are written + +SCR=`dirname $0` + +JOBS=8 # number of jobs for parallel mode (set to number of CPUs if possible) + + +# CONFIGURE FILTERS +FILTERS=( + # NE type filters + "PER:::PER$" + "ORG:::ORG$" + "GPE:::GPE$" + # genre filters + "NW:::^(AFP|APW|CNA|LTW|NYT|WPB|XIN)_ENG_" + "WB:::^eng-(NG|WL)-" + "DF:::^bolt-eng-DF-" + # combined filters + "PER_NW:::^(AFP|APW|CNA|LTW|NYT|WPB|XIN)_ENG_.*PER$" + "PER_WB:::^eng-(NG|WL)-.*PER$" + "PER_DF:::^bolt-eng-DF-.*PER$" + "ORG_NW:::^(AFP|APW|CNA|LTW|NYT|WPB|XIN)_ENG_.*ORG$" + "ORG_WB:::^eng-(NG|WL)-.*ORG$" + "ORG_DF:::^bolt-eng-DF-.*ORG$" + "GPE_NW:::^(AFP|APW|CNA|LTW|NYT|WPB|XIN)_ENG_.*GPE$" + "GPE_WB:::^eng-(NG|WL)-.*GPE$" + "GPE_DF:::^bolt-eng-DF-.*GPE$" + ) + + +# RUN OVERALL EVALUATION +$SCR/run_tac14_evaluation.sh $goldx $goldt $sysdir $outdir $JOBS + + +# GET GOLD STANDARD PATH +gold=$outdir/gold.combined.tsv +if [ ! -e $gold ] +then + echo "ERROR $gold does not exist" + exit 1 +fi + + +# GET LIST OF SYSTEM OUTPUT PATHS +systems=(`ls $outdir/*.combined.tsv | grep -v "gold\.combined\.tsv$"`) +if [ ${#systems[*]} == 0 ] +then + echo "ERROR did not find any system output" + exit 1 +fi + + +# RUN FILTERED EVALUTION +for filter in ${FILTERS[@]} +do + subset=`echo $filter | sed 's/:::.*$//'` + regex=`echo $filter | sed 's/^.*::://'` + + # MAKE DIRECTORY FOR FILTERED EVALUATION + subdir=$outdir/00filtered/$subset + mkdir -p $subdir + + # FILTER AND EVALUATE + echo "INFO Evaluating on $subset subset.." + printf "%s\n" "${systems[@]}" \ + | xargs -n 1 -P $JOBS $SCR/run_filtrate.sh $subdir "$regex" $gold + + # PREPARE SUMMARY REPORT + echo "INFO Preparing summary report.." + $SCR/run_tac14_report.sh $subdir + +done diff --git a/scripts/run_tac14_report.sh b/scripts/run_tac14_report.sh new file mode 100755 index 0000000..c94248d --- /dev/null +++ b/scripts/run_tac14_report.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# +# Prepare score summary in TAC 2014 format +set -e + +usage="Usage: $0 OUT_DIR" + +if [ "$#" -ne 1 ]; then + echo $usage + exit 1 +fi + +outdir=$1; shift # directory to which results are written + +# INITIALISE REPORT HEADER +report=$outdir/00report.tab +echo -e "WikiF1\tCEAFeP\tCEAFeR\tCEAFeF1\tSystem" \ + > $report + +# ADD SYSTEM SCORES +for eval in $outdir/*.evaluation +do + cat $eval \ + | grep 'strong_typed_all_match' \ + | cut -f7 \ + | tr '\n' '\t' \ + >> $report + cat $eval \ + | grep 'entity_ceaf' \ + | cut -f5,6,7 \ + | tr '\n' '\t' \ + >> $report + basename $eval \ + | sed 's/\.evaluation//' \ + >> $report +done diff --git a/scripts/test_tac13_evaluation.sh b/scripts/test_tac13_evaluation.sh index 7cc72c8..a488560 100755 --- a/scripts/test_tac13_evaluation.sh +++ b/scripts/test_tac13_evaluation.sh @@ -38,12 +38,10 @@ official=$outdir/00official.tab cat $scores \ | egrep -v '^[0-9]* queries' \ | head -1 \ - | cut -f1,2,3,4,5 \ > $official cat $scores \ | egrep -v '^[0-9]* queries' \ - | awk '{if (NR>1) print}' \ - | cut -f1,2,3,4,5 \ + | tail -n +2 \ | sort \ >> $official @@ -54,7 +52,7 @@ if [ "" != "`diff $official $report`" ] then difff=$outdir/00diff.txt diff -y $official $report \ - > $difff + > $difff echo "FAIL see $difff" else echo "PASS"