Skip to content

Commit

Permalink
[Template merge] move some generate tests to core
Browse files Browse the repository at this point in the history
  • Loading branch information
flammie committed Jun 16, 2024
1 parent 15b61ee commit 52359ec
Show file tree
Hide file tree
Showing 6 changed files with 149 additions and 573 deletions.
2 changes: 1 addition & 1 deletion m4/giella-macros.m4
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ AC_MSG_RESULT([$GIELLA_CORE])
###############################################################
### This is the version of the Giella Core that we require. ###
### UPDATE AS NEEDED.
_giella_core_min_version=1.0.0
_giella_core_min_version=1.0.1
# GIELLA_CORE/GTCORE env. variable, required by the infrastructure to find scripts:
AC_ARG_VAR([GIELLA_CORE], [directory for the Giella infra core scripts and other required resources])
Expand Down
23 changes: 5 additions & 18 deletions src/fst/morphology/test/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,15 @@

SUBDIRS=phonology

TESTS_ENVIRONMENT = export GIELLA_CORE=$(GIELLA_CORE);

######## Test targets: ###########
######## Local test targets: ###########

# List here (space separated) all test scripts that should be run
# unconditionally:
TESTS=tag_test.sh
TESTS=

# Add your shell scripts for running tests requiring only a generator:
GENERATION_TESTS_IN=generate-adjective-lemmas.sh.in \
generate-noun-lemmas.sh.in \
generate-propernoun-lemmas.sh.in \
generate-verb-lemmas.sh.in

GENERATION_TESTS=generate-adjective-lemmas.sh \
generate-noun-lemmas.sh \
generate-propernoun-lemmas.sh \
generate-verb-lemmas.sh
GENERATION_TESTS_IN=
GENERATION_TESTS=

if WANT_GENERATION
TESTS+=$(GENERATION_TESTS)
Expand All @@ -34,10 +25,6 @@ XFAIL_TESTS=generate-adjective-lemmas.sh \
generate-propernoun-lemmas.sh \
generate-verb-lemmas.sh

# Make sure all tests scripts are included in the distributed tarballs:
EXTRA_DIST=$(TESTS) $(GENERATION_TESTS_IN)

####### Other targets: ###########
clean-local:
-rm -f *.txt
include $(top_srcdir)/../giella-core/am-shared/devtest-include.am
include $(top_srcdir)/../giella-core/am-shared/src-fst-morphology-test-include.am
172 changes: 36 additions & 136 deletions src/fst/morphology/test/generate-adjective-lemmas.sh.in
Original file line number Diff line number Diff line change
@@ -1,152 +1,52 @@
#!/bin/bash
# @configure_input@

# Automake interprets the exit status as follows:
# - an exit status of 0 will denote a success
# - an exit status of 77 a skipped test
# - an exit status of 99 a hard error
# - any other exit status will denote a failure.

# To run this test script only, do:
#
# make check TESTS=generate-noun-lemmas.sh
# make check TESTS=generate-adjective-lemmas.sh

# ensure this is proper run from make check or otherwise set up
if test -z "$srcdir" ; then
echo needs to run from make check or set srcdir=.
exit 1
fi
genscript=$GIELLA_CORE/scripts/generate-lemmas.sh
if ! test -x "$genscript" ; then
echo "missing generator script $genscript"
exit 1
fi
if test -z "$TOOLKITS" ; then
TOOLKITS=hfst
fi

# This test script will test that all noun lemmas do generate as themselves.
# Extend as needed, and copy to new files to adapt to other parts of speech.
# The changes usually needed are:
#
# 1. change the reference to the source file (line 23)
# 2. extend the extract lemmas egrep expression (lines 53 ff)
# 3. adapt the tag addition and lemma generation instructions (lines 79 ff)
# 1. change the reference to the source file (line XX)
# 2. extend the extract lemmas egrep expression (lines YY)
# 3. adapt the tag addition and lemma generation instructions (lines ZZ)

###### Variables: #######
POS=adjectives
### in ###
source_file=${srcdir}/../stems/${POS}.lexc
generator_file=./../../generator-gt-norm
analyser_file=./../../analyser-gt-norm

### out ###
# Temporary files:
lemmas=./filtered-${POS}.txt
# Result files, will get filename suffix programmatically further down:
generated_lemmas=./generated-${POS}
result_file=missing_${POS}_lemmas
gen_result_file=generated_missing_${POS}_lemmas
ana_result_file=analysed_missing_${POS}_lemmas

# SKIP if source file does not exist (works with both single and
# multiple files):
if [ ! `ls -1 $source_file 2>/dev/null | wc -l ` -gt 0 ]; then
echo
echo "*** Warning: Source file(s) $source_file not found."
echo
exit 77
fi

# Use autotools mechanisms to only run the configured fst types in the tests:
fsttype=
@CAN_HFST_TRUE@fsttype="$fsttype hfst"
@CAN_XFST_TRUE@fsttype="$fsttype xfst"

# Exit if both hfst and xerox have been shut off:
if test -z "$fsttype" ; then
echo "All transducer types have been shut off at configure time."
echo "Nothing to test. Skipping."
exit 77
fi

# Get external Mac editor for viewing failed results from configure:
EXTEDITOR=@SEE@

##### Extract lemmas - add additional egrep pattern as parameters: #####
##### --include "(pattern1|pattern2|...)" #####
##### --exclude "(pattern1|pattern2|...)" #####
@GTCORE@/scripts/extract-lemmas.sh \
$source_file > $lemmas

###### Start testing: #######
transducer_found=0
Fail=0

# The script tests both Xerox and Hfst transducers if available:
for f in $fsttype; do
if [ $f == "xfst" ]; then
lookup_tool="@LOOKUP@ -flags mbTT"
suffix="xfst"
# Does lookup support -q / quiet mode?
lookup_quiet=$($lookup_tool -q 2>&1 | grep USAGES)
if ! [[ $lookup_quiet == *"USAGES"* ]] ; then
# it does support quiet mode, add the -q flag:
lookup_tool="@LOOKUP@ -q -flags mbTT"
fi
elif [ $f == "hfst" ]; then
lookup_tool="@HFST_OPTIMIZED_LOOKUP@ -q"
suffix="hfstol"
else
Fail=1
printf "ERROR: Unknown fst type! "
echo "$f - FAIL"
continue
fi
if [ -f "$generator_file.$suffix" ]; then
let "transducer_found += 1"

###### Test lemma generation: #######
# generate lemmas in singular, extract the resulting generated lemma,
# store it:
sed 's/$/+A+Sg+Nom/' $lemmas | $lookup_tool $generator_file.$suffix \
| fgrep -v "+?" | grep -v "^$" | cut -f2 | sort -u \
> $generated_lemmas.$f.txt

# Add more variants as needed, e.g. comparative, superlative only adjs.

###### Collect results, and generate debug info if FAIL: #######
# Sort and compare original input with resulting output - the diff is
# used to generate lemmas which are opened in SEE:
sort -u -o $generated_lemmas.$f.txt $generated_lemmas.$f.txt
comm -23 $lemmas $generated_lemmas.$f.txt > $result_file.$f.txt

# Open the diff file in SubEthaEdit (if there is a diff):
if [ -s $result_file.$f.txt ]; then
grep -v '^$' $result_file.$f.txt \
| sed 's/$/+A+Sg+Nom/' \
| $lookup_tool $generator_file.$suffix \
> $gen_result_file.$f.txt
# If we have an analyser, analyse the missing lemmas as well:
if test -e $analyser_file.$suffix ; then
grep -v '^$' $result_file.$f.txt \
| $lookup_tool $analyser_file.$suffix \
> $ana_result_file.$f.txt
fi
# Only open the failed lemmas in see if @SEE@ is defined:
if [ "$EXTEDITOR" ]; then
$EXTEDITOR $result_file.$f.txt
$EXTEDITOR $gen_result_file.$f.txt
$EXTEDITOR $ana_result_file.$f.txt
else
echo "There were problem lemmas. Details in:"
echo "* $result_file.$f.txt "
echo "* $gen_result_file.$f.txt"
echo "* $ana_result_file.$f.txt"
fi
Fail=1
echo "$f - FAIL"
continue
fi
echo "$f - PASS"
source_file=@abs_srcdir@/../stems/${POS}.lexc
generator_file=@abs_builddir@/../../generator-gt-norm
analyser_file=@abs_builddir@/../../analyser-gt-norm
tags="+A+Sg+Nom"
xtags=
inclusions=
exclusions=

for tk in $TOOLKITS ; do
"$genscript" "$POS" "$source_file" "$generator_file" "$analyser_file" \
"$tk" "$tags" "$xtags" "$inclusions" "$exclusions"
rv=$?
if test $rv = 77 ; then
echo skipped
exit 77
elif test $rv -gt 0 ; then
echo failed
exit 1
fi
done

# At least one of the Xerox or HFST tests failed:
if [ "$Fail" = "1" ]; then
exit 1
fi

if [ $transducer_found -eq 0 ]; then
echo ERROR: No transducer found $analyser_file $generator_file
exit 77
fi

# When done, remove the generated data file:
rm -f $lemmas
Loading

0 comments on commit 52359ec

Please sign in to comment.