From 76a4afeb46e63599fda899de7314cc1ffa8972f7 Mon Sep 17 00:00:00 2001 From: Flammie A Pirinen Date: Mon, 17 Jun 2024 01:26:21 +0200 Subject: [PATCH] [Template merge] move some generate tests to core --- m4/giella-macros.m4 | 2 +- src/fst/morphology/test/Makefile.am | 23 +-- src/fst/morphology/test/Makefile.am.orig | 1 + .../test/generate-adjective-lemmas.sh.in | 172 ++++------------- .../test/generate-noun-lemmas.sh.in | 173 ++++------------- .../test/generate-propernoun-lemmas.sh.in | 181 ++++-------------- .../test/generate-verb-lemmas.sh.in | 171 ++++------------- 7 files changed, 150 insertions(+), 573 deletions(-) diff --git a/m4/giella-macros.m4 b/m4/giella-macros.m4 index 110b0ac6..8a2873a2 100644 --- a/m4/giella-macros.m4 +++ b/m4/giella-macros.m4 @@ -88,7 +88,7 @@ AC_MSG_RESULT([$GIELLA_CORE]) ############################################################### ### This is the version of the Giella Core that we require. ### ### UPDATE AS NEEDED. -_giella_core_min_version=1.0.0 +_giella_core_min_version=1.0.1 # GIELLA_CORE/GTCORE env. variable, required by the infrastructure to find scripts: AC_ARG_VAR([GIELLA_CORE], [directory for the Giella infra core scripts and other required resources]) diff --git a/src/fst/morphology/test/Makefile.am b/src/fst/morphology/test/Makefile.am index 3ad80238..5131a7e2 100644 --- a/src/fst/morphology/test/Makefile.am +++ b/src/fst/morphology/test/Makefile.am @@ -4,24 +4,15 @@ SUBDIRS=phonology -TESTS_ENVIRONMENT = export GIELLA_CORE=$(GIELLA_CORE); - -######## Test targets: ########### +######## Local test targets: ########### # List here (space separated) all test scripts that should be run # unconditionally: -TESTS=tag_test.sh +TESTS= # Add your shell scripts for running tests requiring only a generator: -GENERATION_TESTS_IN=generate-adjective-lemmas.sh.in \ - generate-noun-lemmas.sh.in \ - generate-propernoun-lemmas.sh.in \ - generate-verb-lemmas.sh.in - -GENERATION_TESTS=generate-adjective-lemmas.sh \ - generate-noun-lemmas.sh \ - generate-propernoun-lemmas.sh \ - generate-verb-lemmas.sh +GENERATION_TESTS_IN= +GENERATION_TESTS= if WANT_GENERATION TESTS+=$(GENERATION_TESTS) @@ -35,10 +26,6 @@ XFAIL_TESTS=generate-adjective-lemmas.sh \ generate-verb-lemmas.sh -# Make sure all tests scripts are included in the distributed tarballs: -EXTRA_DIST=$(TESTS) $(GENERATION_TESTS_IN) -####### Other targets: ########### -clean-local: - -rm -f *.txt include $(top_srcdir)/../giella-core/am-shared/devtest-include.am +include $(top_srcdir)/../giella-core/am-shared/src-fst-morphology-test-include.am diff --git a/src/fst/morphology/test/Makefile.am.orig b/src/fst/morphology/test/Makefile.am.orig index 625df3a2..3ad80238 100644 --- a/src/fst/morphology/test/Makefile.am.orig +++ b/src/fst/morphology/test/Makefile.am.orig @@ -41,3 +41,4 @@ EXTRA_DIST=$(TESTS) $(GENERATION_TESTS_IN) ####### Other targets: ########### clean-local: -rm -f *.txt +include $(top_srcdir)/../giella-core/am-shared/devtest-include.am diff --git a/src/fst/morphology/test/generate-adjective-lemmas.sh.in b/src/fst/morphology/test/generate-adjective-lemmas.sh.in index fbfbf8b0..6687c5e3 100644 --- a/src/fst/morphology/test/generate-adjective-lemmas.sh.in +++ b/src/fst/morphology/test/generate-adjective-lemmas.sh.in @@ -1,152 +1,52 @@ #!/bin/bash # @configure_input@ -# Automake interprets the exit status as follows: -# - an exit status of 0 will denote a success -# - an exit status of 77 a skipped test -# - an exit status of 99 a hard error -# - any other exit status will denote a failure. - # To run this test script only, do: # -# make check TESTS=generate-noun-lemmas.sh +# make check TESTS=generate-adjective-lemmas.sh + +# ensure this is proper run from make check or otherwise set up +if test -z "$srcdir" ; then + echo needs to run from make check or set srcdir=. + exit 1 +fi +genscript=$GIELLA_CORE/scripts/generate-lemmas.sh +if ! test -x "$genscript" ; then + echo "missing generator script $genscript" + exit 1 +fi +if test -z "$TOOLKITS" ; then + TOOLKITS=hfst +fi # This test script will test that all noun lemmas do generate as themselves. # Extend as needed, and copy to new files to adapt to other parts of speech. # The changes usually needed are: # -# 1. change the reference to the source file (line 23) -# 2. extend the extract lemmas egrep expression (lines 53 ff) -# 3. adapt the tag addition and lemma generation instructions (lines 79 ff) +# 1. change the reference to the source file (line XX) +# 2. extend the extract lemmas egrep expression (lines YY) +# 3. adapt the tag addition and lemma generation instructions (lines ZZ) ###### Variables: ####### POS=adjectives ### in ### -source_file=${srcdir}/../stems/${POS}.lexc -generator_file=./../../generator-gt-norm -analyser_file=./../../analyser-gt-norm - -### out ### -# Temporary files: -lemmas=./filtered-${POS}.txt -# Result files, will get filename suffix programmatically further down: -generated_lemmas=./generated-${POS} -result_file=missing_${POS}_lemmas -gen_result_file=generated_missing_${POS}_lemmas -ana_result_file=analysed_missing_${POS}_lemmas - -# SKIP if source file does not exist (works with both single and -# multiple files): -if [ ! `ls -1 $source_file 2>/dev/null | wc -l ` -gt 0 ]; then - echo - echo "*** Warning: Source file(s) $source_file not found." - echo - exit 77 -fi - -# Use autotools mechanisms to only run the configured fst types in the tests: -fsttype= -@CAN_HFST_TRUE@fsttype="$fsttype hfst" -@CAN_XFST_TRUE@fsttype="$fsttype xfst" - -# Exit if both hfst and xerox have been shut off: -if test -z "$fsttype" ; then - echo "All transducer types have been shut off at configure time." - echo "Nothing to test. Skipping." - exit 77 -fi - -# Get external Mac editor for viewing failed results from configure: -EXTEDITOR=@SEE@ - -##### Extract lemmas - add additional egrep pattern as parameters: ##### -##### --include "(pattern1|pattern2|...)" ##### -##### --exclude "(pattern1|pattern2|...)" ##### -@GTCORE@/scripts/extract-lemmas.sh \ - $source_file > $lemmas - -###### Start testing: ####### -transducer_found=0 -Fail=0 - -# The script tests both Xerox and Hfst transducers if available: -for f in $fsttype; do - if [ $f == "xfst" ]; then - lookup_tool="@LOOKUP@ -flags mbTT" - suffix="xfst" - # Does lookup support -q / quiet mode? - lookup_quiet=$($lookup_tool -q 2>&1 | grep USAGES) - if ! [[ $lookup_quiet == *"USAGES"* ]] ; then - # it does support quiet mode, add the -q flag: - lookup_tool="@LOOKUP@ -q -flags mbTT" - fi - elif [ $f == "hfst" ]; then - lookup_tool="@HFST_OPTIMIZED_LOOKUP@ -q" - suffix="hfstol" - else - Fail=1 - printf "ERROR: Unknown fst type! " - echo "$f - FAIL" - continue - fi - if [ -f "$generator_file.$suffix" ]; then - let "transducer_found += 1" - -###### Test lemma generation: ####### - # generate lemmas in singular, extract the resulting generated lemma, - # store it: - sed 's/$/+A+Sg+Nom/' $lemmas | $lookup_tool $generator_file.$suffix \ - | fgrep -v "+?" | grep -v "^$" | cut -f2 | sort -u \ - > $generated_lemmas.$f.txt - - # Add more variants as needed, e.g. comparative, superlative only adjs. - -###### Collect results, and generate debug info if FAIL: ####### - # Sort and compare original input with resulting output - the diff is - # used to generate lemmas which are opened in SEE: - sort -u -o $generated_lemmas.$f.txt $generated_lemmas.$f.txt - comm -23 $lemmas $generated_lemmas.$f.txt > $result_file.$f.txt - - # Open the diff file in SubEthaEdit (if there is a diff): - if [ -s $result_file.$f.txt ]; then - grep -v '^$' $result_file.$f.txt \ - | sed 's/$/+A+Sg+Nom/' \ - | $lookup_tool $generator_file.$suffix \ - > $gen_result_file.$f.txt - # If we have an analyser, analyse the missing lemmas as well: - if test -e $analyser_file.$suffix ; then - grep -v '^$' $result_file.$f.txt \ - | $lookup_tool $analyser_file.$suffix \ - > $ana_result_file.$f.txt - fi - # Only open the failed lemmas in see if @SEE@ is defined: - if [ "$EXTEDITOR" ]; then - $EXTEDITOR $result_file.$f.txt - $EXTEDITOR $gen_result_file.$f.txt - $EXTEDITOR $ana_result_file.$f.txt - else - echo "There were problem lemmas. Details in:" - echo "* $result_file.$f.txt " - echo "* $gen_result_file.$f.txt" - echo "* $ana_result_file.$f.txt" - fi - Fail=1 - echo "$f - FAIL" - continue - fi - echo "$f - PASS" +source_file=@abs_srcdir@/../stems/${POS}.lexc +generator_file=@abs_builddir@/../../generator-gt-norm +analyser_file=@abs_builddir@/../../analyser-gt-norm +tags="+A+Sg+Nom" +xtags= +inclusions= +exclusions= + +for tk in $TOOLKITS ; do + "$genscript" "$POS" "$source_file" "$generator_file" "$analyser_file" \ + "$tk" "$tags" "$xtags" "$inclusions" "$exclusions" + rv=$? + if test $rv = 77 ; then + echo skipped + exit 77 + elif test $rv -gt 0 ; then + echo failed + exit 1 fi done - -# At least one of the Xerox or HFST tests failed: -if [ "$Fail" = "1" ]; then - exit 1 -fi - -if [ $transducer_found -eq 0 ]; then - echo ERROR: No transducer found $analyser_file $generator_file - exit 77 -fi - -# When done, remove the generated data file: -rm -f $lemmas diff --git a/src/fst/morphology/test/generate-noun-lemmas.sh.in b/src/fst/morphology/test/generate-noun-lemmas.sh.in index 2154b240..b26426c0 100755 --- a/src/fst/morphology/test/generate-noun-lemmas.sh.in +++ b/src/fst/morphology/test/generate-noun-lemmas.sh.in @@ -1,156 +1,51 @@ #!/bin/bash # @configure_input@ -# Automake interprets the exit status as follows: -# - an exit status of 0 will denote a success -# - an exit status of 77 a skipped test -# - an exit status of 99 a hard error -# - any other exit status will denote a failure. - # To run this test script only, do: # # make check TESTS=generate-noun-lemmas.sh +if test -z "$srcdir" ; then + echo needs to run from make check or set srcdir=. + exit 1 +fi +genscript=$GIELLA_CORE/scripts/generate-lemmas.sh +if ! test -x $genscript ; then + echo "missing generator scdipt $genscript" + exit 1 +fi +if test -z "$TOOLKITS" ; then + TOOLKITS=hfst +fi + # This test script will test that all noun lemmas do generate as themselves. # Extend as needed, and copy to new files to adapt to other parts of speech. # The changes usually needed are: # -# 1. change the reference to the source file (line 23) -# 2. extend the extract lemmas egrep expression (lines 53 ff) -# 3. adapt the tag addition and lemma generation instructions (lines 79 ff) +# 1. change the reference to the source file (line 32) +# 2. extend the extract lemmas egrep expression (lines 35–36) +# 3. adapt the tag addition and lemma generation instructions (lines 37–38) ###### Variables: ####### POS=nouns ### in ### -source_file=${srcdir}/../stems/${POS}.lexc -generator_file=./../../generator-gt-norm -analyser_file=./../../analyser-gt-norm - -### out ### -# Temporary files: -lemmas=./filtered-${POS}.txt -# Result files, will get filename suffix programmatically further down: -generated_lemmas=./generated-${POS} -result_file=missing_${POS}_lemmas -gen_result_file=generated_missing_${POS}_lemmas -ana_result_file=analysed_missing_${POS}_lemmas - -# SKIP if source file does not exist (works with both single and -# multiple files): -if [ ! `ls -1 $source_file 2>/dev/null | wc -l ` -gt 0 ]; then - echo - echo "*** Warning: Source file(s) $source_file not found." - echo - exit 77 -fi - -# Use autotools mechanisms to only run the configured fst types in the tests: -fsttype= -@CAN_HFST_TRUE@fsttype="$fsttype hfst" -@CAN_XFST_TRUE@fsttype="$fsttype xfst" - -# Exit if both hfst and xerox have been shut off: -if test -z "$fsttype" ; then - echo "All transducer types have been shut off at configure time." - echo "Nothing to test. Skipping." - exit 77 -fi - -# Get external Mac editor for viewing failed results from configure: -EXTEDITOR=@SEE@ - -##### Extract lemmas - add additional egrep pattern as parameters: ##### -##### --include "(pattern1|pattern2|...)" ##### -##### --exclude "(pattern1|pattern2|...)" ##### -@GTCORE@/scripts/extract-lemmas.sh \ - $source_file > $lemmas - -###### Start testing: ####### -transducer_found=0 -Fail=0 - -# The script tests both Xerox and Hfst transducers if available: -for f in $fsttype; do - if [ $f == "xfst" ]; then - lookup_tool="@LOOKUP@ -flags mbTT" - suffix="xfst" - # Does lookup support -q / quiet mode? - lookup_quiet=$($lookup_tool -q 2>&1 | grep USAGES) - if ! [[ $lookup_quiet == *"USAGES"* ]] ; then - # it does support quiet mode, add the -q flag: - lookup_tool="@LOOKUP@ -q -flags mbTT" - fi - elif [ $f == "hfst" ]; then - lookup_tool="@HFST_OPTIMIZED_LOOKUP@ -q" - suffix="hfstol" - else - Fail=1 - printf "ERROR: Unknown fst type! " - echo "$f - FAIL" - continue - fi - if [ -f "$generator_file.$suffix" ]; then - let "transducer_found += 1" - -###### Test non-comopunds: ####### - # generate nouns in Singular, extract the resulting generated lemma, - # store it: - sed 's/$/+N+Sg+Nom/' $lemmas | $lookup_tool $generator_file.$suffix \ - | fgrep -v "+?" | grep -v "^$" | cut -f2 | sort -u \ - > $generated_lemmas.$f.txt - # Generate nouns, extract those that do not generate in singular, - # generate the rest in plural: - sed 's/$/+N+Sg+Nom/' $lemmas | $lookup_tool $generator_file.$suffix \ - | fgrep "+?" | cut -d "+" -f1 | sed 's/$/+N+Pl+Nom/' \ - | $lookup_tool $generator_file.$suffix | cut -f2 \ - | grep -v "^$" >> $generated_lemmas.$f.txt - -###### Collect results, and generate debug info if FAIL: ####### - # Sort and compare original input with resulting output - the diff is - # used to generate lemmas which are opened in SEE: - sort -u -o $generated_lemmas.$f.txt $generated_lemmas.$f.txt - comm -23 $lemmas $generated_lemmas.$f.txt > $result_file.$f.txt - - # Open the diff file in SubEthaEdit (if there is a diff): - if [ -s $result_file.$f.txt ]; then - grep -v '^$' $result_file.$f.txt \ - | sed 's/$/+N+Sg+Nom/' \ - | $lookup_tool $generator_file.$suffix \ - > $gen_result_file.$f.txt - # If we have an analyser, analyse the missing lemmas as well: - if test -e $analyser_file.$suffix ; then - grep -v '^$' $result_file.$f.txt \ - | $lookup_tool $analyser_file.$suffix \ - > $ana_result_file.$f.txt - fi - # Only open the failed lemmas in see if @SEE@ is defined: - if [ "$EXTEDITOR" ]; then - $EXTEDITOR $result_file.$f.txt - $EXTEDITOR $gen_result_file.$f.txt - $EXTEDITOR $ana_result_file.$f.txt - else - echo "There were problem lemmas. Details in:" - echo "* $result_file.$f.txt " - echo "* $gen_result_file.$f.txt" - echo "* $ana_result_file.$f.txt" - fi - Fail=1 - echo "$f - FAIL" - continue - fi - echo "$f - PASS" +source_file=@abs_srcdir@/../stems/${POS}.lexc +generator_file=@abs_builddir@/../../generator-gt-norm +analyser_file=@abs_builddir@/../../analyser-gt-norm +tags="+N+Sg+Nom" +xtags="+N+Pl+Nom" +inclusions= +exclusions= + +for tk in $TOOLKITS ; do + "$genscript" "$POS" "$source_file" "$generator_file" "$analyser_file"\ + "$tk" "$tags" "$xtags" "$inclusions" "$exclusions" + rv=$? + if test $rv = 77 ; then + echo skipped + exit 77 + elif test $rv -gt 0 ; then + echo failed + exit 1 fi done - -# At least one of the Xerox or HFST tests failed: -if [ "$Fail" = "1" ]; then - exit 1 -fi - -if [ $transducer_found -eq 0 ]; then - echo ERROR: No transducer found $analyser_file $generator_file - exit 77 -fi - -# When done, remove the generated data file: -rm -f $lemmas diff --git a/src/fst/morphology/test/generate-propernoun-lemmas.sh.in b/src/fst/morphology/test/generate-propernoun-lemmas.sh.in index ba8be712..655d9332 100644 --- a/src/fst/morphology/test/generate-propernoun-lemmas.sh.in +++ b/src/fst/morphology/test/generate-propernoun-lemmas.sh.in @@ -1,161 +1,52 @@ #!/bin/bash # @configure_input@ -# Automake interprets the exit status as follows: -# - an exit status of 0 will denote a success -# - an exit status of 77 a skipped test -# - an exit status of 99 a hard error -# - any other exit status will denote a failure. - # To run this test script only, do: # -# make check TESTS=generate-noun-lemmas.sh +# make check TESTS=generate-propernoun-lemmas.sh + +# ensure this is proper run from make check or otherwise set up +if test -z "$srcdir" ; then + echo needs to run from make check or set srcdir=. + exit 1 +fi +genscript=$GIELLA_CORE/scripts/generate-lemmas.sh +if ! test -x "$genscript" ; then + echo "missing generator script $genscript" + exit 1 +fi +if test -z "$TOOLKITS" ; then + TOOLKITS=hfst +fi # This test script will test that all noun lemmas do generate as themselves. # Extend as needed, and copy to new files to adapt to other parts of speech. # The changes usually needed are: # -# 1. change the reference to the source file (line 23) -# 2. extend the extract lemmas egrep expression (lines 53 ff) -# 3. adapt the tag addition and lemma generation instructions (lines 79 ff) +# 1. change the reference to the source file (line xx) +# 2. extend the extract lemmas egrep expression (lines yy) +# 3. adapt the tag addition and lemma generation instructions (lines zz) ###### Variables: ####### POS=propernouns ### in ### -source_file=${srcdir}/../stems/${POS}.lexc -generator_file=./../../generator-gt-norm -analyser_file=./../../analyser-gt-norm - -### out ### -# Temporary files: -lemmas=./filtered-${POS}.txt -# Result files, will get filename suffix programmatically further down: -generated_lemmas=./generated-${POS} -result_file=missing_${POS}_lemmas -gen_result_file=generated_missing_${POS}_lemmas -ana_result_file=analysed_missing_${POS}_lemmas - -# SKIP if source file does not exist (works with both single and -# multiple files): -if [ ! `ls -1 $source_file 2>/dev/null | wc -l ` -gt 0 ]; then - echo - echo "*** Warning: Source file(s) $source_file not found." - echo - exit 77 -fi - -# Use autotools mechanisms to only run the configured fst types in the tests: -fsttype= -@CAN_HFST_TRUE@fsttype="$fsttype hfst" -@CAN_XFST_TRUE@fsttype="$fsttype xfst" - -# Exit if both hfst and xerox have been shut off: -if test -z "$fsttype" ; then - echo "All transducer types have been shut off at configure time." - echo "Nothing to test. Skipping." - exit 77 -fi - -# Get external Mac editor for viewing failed results from configure: -EXTEDITOR=@SEE@ - -##### Extract lemmas - add additional egrep pattern as parameters: ##### -##### --include "(pattern1|pattern2|...)" ##### -##### --exclude "(pattern1|pattern2|...)" ##### -@GTCORE@/scripts/extract-lemmas.sh \ - $source_file > $lemmas - -###### Start testing: ####### -transducer_found=0 -Fail=0 - -# The script tests both Xerox and Hfst transducers if available: -for f in $fsttype; do - if [ $f == "xfst" ]; then - lookup_tool="@LOOKUP@ -flags mbTT" - suffix="xfst" - # Does lookup support -q / quiet mode? - lookup_quiet=$($lookup_tool -q 2>&1 | grep USAGES) - if ! [[ $lookup_quiet == *"USAGES"* ]] ; then - # it does support quiet mode, add the -q flag: - lookup_tool="@LOOKUP@ -q -flags mbTT" - fi - elif [ $f == "hfst" ]; then - lookup_tool="@HFST_OPTIMIZED_LOOKUP@ -q" - suffix="hfstol" - else - Fail=1 - printf "ERROR: Unknown fst type! " - echo "$f - FAIL" - continue - fi - if [ -f "$generator_file.$suffix" ]; then - let "transducer_found += 1" - -###### Test non-comopunds: ####### - # generate propers in singular, extract the resulting generated lemma, - # store it: - sed 's/$/+N+Prop+Sg+Nom/' $lemmas \ - | $lookup_tool $generator_file.$suffix \ - | fgrep -v "+?" | grep -v "^$" | cut -f2 \ - > $generated_lemmas.$f.txt - - # Generate nouns, extract those that do not generate in singular, - # and try to generate them in plural: - sed 's/$/+N+Prop+Sg+Nom/' $lemmas \ - | $lookup_tool $generator_file.$suffix \ - | fgrep "+?" | cut -d "+" -f1 \ - | sed 's/$/+N+Prop+Pl+Nom/' \ - | $lookup_tool $generator_file.$suffix \ - | fgrep -v "+?" | grep -v "^$" | cut -f2 \ - >> $generated_lemmas.$f.txt - -###### Collect results, and generate debug info if FAIL: ####### - # Sort and compare original input with resulting output - the diff is - # used to generate lemmas which are opened in SEE: - sort -u -o $generated_lemmas.$f.txt $generated_lemmas.$f.txt - comm -23 $lemmas $generated_lemmas.$f.txt > $result_file.$f.txt - - # Open the diff file in SubEthaEdit (if there is a diff): - if [ -s $result_file.$f.txt ]; then - grep -v '^$' $result_file.$f.txt \ - | sed 's/$/+N+Prop+Sg+Nom/' \ - | $lookup_tool $generator_file.$suffix \ - > $gen_result_file.$f.txt - # If we have an analyser, analyse the missing lemmas as well: - if test -e $analyser_file.$suffix ; then - grep -v '^$' $result_file.$f.txt \ - | $lookup_tool $analyser_file.$suffix \ - > $ana_result_file.$f.txt - fi - # Only open the failed lemmas in see if @SEE@ is defined: - if [ "$EXTEDITOR" ]; then - $EXTEDITOR $result_file.$f.txt - $EXTEDITOR $gen_result_file.$f.txt - $EXTEDITOR $ana_result_file.$f.txt - else - echo "There were problem lemmas. Details in:" - echo "* $result_file.$f.txt " - echo "* $gen_result_file.$f.txt" - echo "* $ana_result_file.$f.txt" - fi - Fail=1 - echo "$f - FAIL" - continue - fi - echo "$f - PASS" +source_file=@abs_srcdir@/../stems/${POS}.lexc +generator_file=@abs_builddir@/../../generator-gt-norm +analyser_file=@abs_builddir@/../../analyser-gt-norm +tags="+N+Prop+Sg+Nom" +xtags= +inclusions= +exclusions= + +for tk in $TOOLKITS ; do + "$genscript" "$POS" "$source_file" "$generator_file" "$analyser_file" \ + "$tk" "$tags" "$xtags" "$inclusions" "$exclusions" + rv=$? + if test $rv = 77 ; then + echo skipped + exit 77 + elif test $rv -gt 0 ; then + echo failed + exit 1 fi done - -# At least one of the Xerox or HFST tests failed: -if [ "$Fail" = "1" ]; then - exit 1 -fi - -if [ $transducer_found -eq 0 ]; then - echo ERROR: No transducer found $analyser_file $generator_file - exit 77 -fi - -# When done, remove the generated data file: -rm -f $lemmas diff --git a/src/fst/morphology/test/generate-verb-lemmas.sh.in b/src/fst/morphology/test/generate-verb-lemmas.sh.in index e11989d5..d0afb195 100644 --- a/src/fst/morphology/test/generate-verb-lemmas.sh.in +++ b/src/fst/morphology/test/generate-verb-lemmas.sh.in @@ -1,150 +1,53 @@ #!/bin/bash # @configure_input@ -# Automake interprets the exit status as follows: -# - an exit status of 0 will denote a success -# - an exit status of 77 a skipped test -# - an exit status of 99 a hard error -# - any other exit status will denote a failure. - # To run this test script only, do: # -# make check TESTS=generate-noun-lemmas.sh +# make check TESTS=generate-verb-lemmas.sh + +# ensure this is proper run from make check or otherwise set up +if test -z "$srcdir" ; then + echo needs to run from make check or set srcdir=. + exit 1 +fi +genscript=$GIELLA_CORE/scripts/generate-lemmas.sh +if ! test -x "$genscript" ; then + echo "missing generator script $genscript" + exit 1 +fi +if test -z "$TOOLKITS" ; then + TOOLKITS=hfst +fi # This test script will test that all noun lemmas do generate as themselves. # Extend as needed, and copy to new files to adapt to other parts of speech. # The changes usually needed are: # -# 1. change the reference to the source file (line 23) -# 2. extend the extract lemmas egrep expression (lines 53 ff) -# 3. adapt the tag addition and lemma generation instructions (lines 79 ff) +# 1. change the reference to the source file (line 33) +# 2. extend the extract lemmas egrep expression (lines 36–37) +# 3. adapt the tag addition and lemma generation instructions (lines 38–39) ###### Variables: ####### POS=verbs ### in ### -source_file=${srcdir}/../stems/${POS}.lexc -generator_file=./../../generator-gt-norm -analyser_file=./../../analyser-gt-norm - -### out ### -# Temporary files: -lemmas=./filtered-${POS}.txt -# Result files, will get filename suffix programmatically further down: -generated_lemmas=./generated-${POS} -result_file=missing_${POS}_lemmas -gen_result_file=generated_missing_${POS}_lemmas -ana_result_file=analysed_missing_${POS}_lemmas - -# SKIP if source file does not exist (works with both single and -# multiple files): -if [ ! `ls -1 $source_file 2>/dev/null | wc -l ` -gt 0 ]; then - echo - echo "*** Warning: Source file(s) $source_file not found." - echo - exit 77 -fi - -# Use autotools mechanisms to only run the configured fst types in the tests: -fsttype= -@CAN_HFST_TRUE@fsttype="$fsttype hfst" -@CAN_XFST_TRUE@fsttype="$fsttype xfst" - -# Exit if both hfst and xerox have been shut off: -if test -z "$fsttype" ; then - echo "All transducer types have been shut off at configure time." - echo "Nothing to test. Skipping." - exit 77 -fi - -# Get external Mac editor for viewing failed results from configure: -EXTEDITOR=@SEE@ - -##### Extract lemmas - add additional egrep pattern as parameters: ##### -##### --include "(pattern1|pattern2|...)" ##### -##### --exclude "(pattern1|pattern2|...)" ##### -@GTCORE@/scripts/extract-lemmas.sh \ - $source_file > $lemmas - -###### Start testing: ####### -transducer_found=0 -Fail=0 - -# The script tests both Xerox and Hfst transducers if available: -for f in $fsttype; do - if [ $f == "xfst" ]; then - lookup_tool="@LOOKUP@ -flags mbTT" - suffix="xfst" - # Does lookup support -q / quiet mode? - lookup_quiet=$($lookup_tool -q 2>&1 | grep USAGES) - if ! [[ $lookup_quiet == *"USAGES"* ]] ; then - # it does support quiet mode, add the -q flag: - lookup_tool="@LOOKUP@ -q -flags mbTT" - fi - elif [ $f == "hfst" ]; then - lookup_tool="@HFST_OPTIMIZED_LOOKUP@ -q" - suffix="hfstol" - else - Fail=1 - printf "ERROR: Unknown fst type! " - echo "$f - FAIL" - continue - fi - if [ -f "$generator_file.$suffix" ]; then - let "transducer_found += 1" - -###### Test non-comopunds: ####### - # generate verbs in infitive, extract the resulting generated lemma, - # store it: - sed 's/$/+V+Inf/' $lemmas | $lookup_tool $generator_file.$suffix \ - | fgrep -v "+?" | grep -v "^$" | cut -f2 | sort -u \ - > $generated_lemmas.$f.txt - -###### Collect results, and generate debug info if FAIL: ####### - # Sort and compare original input with resulting output - the diff is - # used to generate lemmas which are opened in SEE: - sort -u -o $generated_lemmas.$f.txt $generated_lemmas.$f.txt - comm -23 $lemmas $generated_lemmas.$f.txt > $result_file.$f.txt - - # Open the diff file in SubEthaEdit (if there is a diff): - if [ -s $result_file.$f.txt ]; then - grep -v '^$' $result_file.$f.txt \ - | sed 's/$/+V+Inf/' \ - | $lookup_tool $generator_file.$suffix \ - > $gen_result_file.$f.txt - # If we have an analyser, analyse the missing lemmas as well: - if test -e $analyser_file.$suffix ; then - grep -v '^$' $result_file.$f.txt \ - | $lookup_tool $analyser_file.$suffix \ - > $ana_result_file.$f.txt - fi - # Only open the failed lemmas in see if @SEE@ is defined: - if [ "$EXTEDITOR" ]; then - $EXTEDITOR $result_file.$f.txt - $EXTEDITOR $gen_result_file.$f.txt - $EXTEDITOR $ana_result_file.$f.txt - else - echo "There were problem lemmas. Details in:" - echo "* $result_file.$f.txt " - echo "* $gen_result_file.$f.txt" - echo "* $ana_result_file.$f.txt" - fi - Fail=1 - echo "$f - FAIL" - continue - fi - echo "$f - PASS" +source_file=@abs_srcdir@/../stems/${POS}.lexc +generator_file=@abs_builddir@/../../generator-gt-norm +analyser_file=@abs_builddir@/../../analyser-gt-norm +tags=+V+Inf +xtags= +inclusions= +exclusions= + +# call the generator script in giella core +for tk in $TOOLKITS ; do + "$genscript" "$POS" "$source_file" "$generator_file" "$analyser_file" \ + "$tk" "$tags" "$xtags" "$inclusions" "$exclusions" + rv=$? + if test $rv = 77 ; then + echo skipped + exit 77 + elif test $rv -gt 0 ; then + echo failed + exit 1 fi done - -# At least one of the Xerox or HFST tests failed: -if [ "$Fail" = "1" ]; then - exit 1 -fi - -if [ $transducer_found -eq 0 ]; then - echo ERROR: No transducer found $analyser_file $generator_file - exit 77 -fi - -# When done, remove the generated data file: -rm -f $lemmas