-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Template merge] move some generate tests to core
- Loading branch information
Showing
6 changed files
with
149 additions
and
578 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
177 changes: 36 additions & 141 deletions
177
src/fst/morphology/test/generate-adjective-lemmas.sh.in
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,158 +1,53 @@ | ||
#!/bin/bash | ||
# @configure_input@ | ||
|
||
# Automake interprets the exit status as follows: | ||
# - an exit status of 0 will denote a success | ||
# - an exit status of 77 a skipped test | ||
# - an exit status of 99 a hard error | ||
# - any other exit status will denote a failure. | ||
|
||
# To run this test script only, do: | ||
# | ||
# make check TESTS=generate-noun-lemmas.sh | ||
# make check TESTS=generate-adjective-lemmas.sh | ||
|
||
# ensure this is proper run from make check or otherwise set up | ||
if test -z "$srcdir" ; then | ||
echo needs to run from make check or set srcdir=. | ||
exit 1 | ||
fi | ||
genscript=$GIELLA_CORE/scripts/generate-lemmas.sh | ||
if ! test -x "$genscript" ; then | ||
echo "missing generator script $genscript" | ||
exit 1 | ||
fi | ||
if test -z "$TOOLKITS" ; then | ||
TOOLKITS=hfst | ||
fi | ||
|
||
# This test script will test that all noun lemmas do generate as themselves. | ||
# Extend as needed, and copy to new files to adapt to other parts of speech. | ||
# The changes usually needed are: | ||
# | ||
# 1. change the reference to the source file (line 23) | ||
# 2. extend the extract lemmas egrep expression (lines 53 ff) | ||
# 3. adapt the tag addition and lemma generation instructions (lines 79 ff) | ||
# 1. change the reference to the source file (line XX) | ||
# 2. extend the extract lemmas egrep expression (lines YY) | ||
# 3. adapt the tag addition and lemma generation instructions (lines ZZ) | ||
|
||
###### Variables: ####### | ||
POS=adjectives | ||
### in ### | ||
source_file=${srcdir}/../stems/${POS}.lexc | ||
generator_file=./../../generator-gt-desc | ||
analyser_file=./../../analyser-gt-desc | ||
|
||
### out ### | ||
# Temporary files: | ||
lemmas=./filtered-${POS}.txt | ||
# Result files, will get filename suffix programmatically further down: | ||
generated_lemmas=./generated-${POS} | ||
result_file=missing_${POS}_lemmas | ||
gen_result_file=generated_missing_${POS}_lemmas | ||
ana_result_file=analysed_missing_${POS}_lemmas | ||
|
||
# SKIP if source file does not exist (works with both single and | ||
# multiple files): | ||
if [ ! `ls -1 $source_file 2>/dev/null | wc -l ` -gt 0 ]; then | ||
echo | ||
echo "*** Warning: Source file(s) $source_file not found." | ||
echo | ||
exit 77 | ||
fi | ||
|
||
# Use autotools mechanisms to only run the configured fst types in the tests: | ||
fsttype= | ||
@CAN_HFST_TRUE@fsttype="$fsttype hfst" | ||
@CAN_XFST_TRUE@fsttype="$fsttype xfst" | ||
|
||
# Exit if both hfst and xerox have been shut off: | ||
if test -z "$fsttype" ; then | ||
echo "All transducer types have been shut off at configure time." | ||
echo "Nothing to test. Skipping." | ||
exit 77 | ||
fi | ||
|
||
# Get external Mac editor for viewing failed results from configure: | ||
EXTEDITOR=@SEE@ | ||
|
||
##### Extract lemmas - add additional egrep pattern as parameters: ##### | ||
##### --include "(pattern1|pattern2|...)" ##### | ||
##### --exclude "(pattern1|pattern2|...)" ##### | ||
@GTCORE@/scripts/extract-lemmas.sh \ | ||
$source_file > $lemmas | ||
|
||
###### Start testing: ####### | ||
transducer_found=0 | ||
Fail=0 | ||
|
||
# The script tests both Xerox and Hfst transducers if available: | ||
for f in $fsttype; do | ||
if [ $f == "xfst" ]; then | ||
lookup_tool="@LOOKUP@ -flags mbTT" | ||
suffix="xfst" | ||
# Does lookup support -q / quiet mode? | ||
lookup_quiet=$($lookup_tool -q 2>&1 | grep USAGES) | ||
if ! [[ $lookup_quiet == *"USAGES"* ]] ; then | ||
# it does support quiet mode, add the -q flag: | ||
lookup_tool="@LOOKUP@ -q -flags mbTT" | ||
fi | ||
elif [ $f == "hfst" ]; then | ||
lookup_tool="@HFST_OPTIMIZED_LOOKUP@ -q" | ||
suffix="hfstol" | ||
else | ||
Fail=1 | ||
printf "ERROR: Unknown fst type! " | ||
echo "$f - FAIL" | ||
continue | ||
fi | ||
if [ -f "$generator_file.$suffix" ]; then | ||
let "transducer_found += 1" | ||
|
||
###### Test lemma generation: ####### | ||
# generate lemmas in singular, extract the resulting generated lemma, | ||
# store it: | ||
sed 's/$/+A+Sg+Nom/' $lemmas | $lookup_tool $generator_file.$suffix \ | ||
| fgrep -v "+?" | grep -v "^$" | cut -f2 | sort -u \ | ||
> $generated_lemmas.$f.txt | ||
# Generate adjectives, extract those that do not generate in singular nominative, | ||
# generate the rest in singular genitive: | ||
sed 's/$/+A+Sg+Nom/' $lemmas | $lookup_tool $generator_file.$suffix \ | ||
| fgrep "+?" | cut -d "+" -f1 | sed 's/$/+A+Sg+Gen/' \ | ||
| $lookup_tool $generator_file.$suffix | cut -f2 \ | ||
| grep -v "^$" >> $generated_lemmas.$f.txt | ||
|
||
# Add more variants as needed, e.g. comparative, superlative only adjs. | ||
|
||
###### Collect results, and generate debug info if FAIL: ####### | ||
# Sort and compare original input with resulting output - the diff is | ||
# used to generate lemmas which are opened in SEE: | ||
sort -u -o $generated_lemmas.$f.txt $generated_lemmas.$f.txt | ||
comm -23 $lemmas $generated_lemmas.$f.txt > $result_file.$f.txt | ||
|
||
# Open the diff file in SubEthaEdit (if there is a diff): | ||
if [ -s $result_file.$f.txt ]; then | ||
grep -v '^$' $result_file.$f.txt \ | ||
| sed 's/$/+A+Sg+Nom/' \ | ||
| $lookup_tool $generator_file.$suffix \ | ||
> $gen_result_file.$f.txt | ||
# If we have an analyser, analyse the missing lemmas as well: | ||
if test -e $analyser_file.$suffix ; then | ||
grep -v '^$' $result_file.$f.txt \ | ||
| $lookup_tool $analyser_file.$suffix \ | ||
> $ana_result_file.$f.txt | ||
fi | ||
# Only open the failed lemmas in see if @SEE@ is defined: | ||
if [ "$EXTEDITOR" ]; then | ||
$EXTEDITOR $result_file.$f.txt | ||
$EXTEDITOR $gen_result_file.$f.txt | ||
$EXTEDITOR $ana_result_file.$f.txt | ||
else | ||
echo "There were problem lemmas. Details in:" | ||
echo "* $result_file.$f.txt " | ||
echo "* $gen_result_file.$f.txt" | ||
echo "* $ana_result_file.$f.txt" | ||
fi | ||
Fail=1 | ||
echo "$f - FAIL" | ||
continue | ||
fi | ||
echo "$f - PASS" | ||
source_file=@abs_srcdir@/../stems/${POS}.lexc | ||
generator_file=@abs_builddir@/../../generator-gt-norm | ||
analyser_file=@abs_builddir@/../../analyser-gt-norm | ||
tags="+A+Sg+Nom" | ||
xtags="+A+Sg+Gen" | ||
inclusions= | ||
exclusions= | ||
|
||
for tk in $TOOLKITS ; do | ||
"$genscript" "$POS" "$source_file" "$generator_file" "$analyser_file" \ | ||
"$tk" "$tags" "$xtags" "$inclusions" "$exclusions" | ||
rv=$? | ||
if test $rv = 77 ; then | ||
echo skipped | ||
exit 77 | ||
elif test $rv -gt 0 ; then | ||
echo failed | ||
exit 1 | ||
fi | ||
done | ||
|
||
# At least one of the Xerox or HFST tests failed: | ||
if [ "$Fail" = "1" ]; then | ||
exit 1 | ||
fi | ||
|
||
if [ $transducer_found -eq 0 ]; then | ||
echo ERROR: No transducer found $analyser_file $generator_file | ||
exit 77 | ||
fi | ||
|
||
# When done, remove the generated data file: | ||
rm -f $lemmas |
Oops, something went wrong.