diff --git a/.gitignore b/.gitignore index b2355070..b3d95092 100644 --- a/.gitignore +++ b/.gitignore @@ -61,19 +61,19 @@ /src/cg3/functions.cg3 /src/cg3/generated-tag-list.cg3 /src/cg3/valency-postspell.cg3 -/src/filters/*-tags.txt -/src/filters/*area-*.regex -/src/filters/remove-all*.regex -/src/filters/remove-homonymy-tags.regex -/src/filters/remove-usage-tags.regex -/src/fst/*-error-log.txt -/src/fst/*.tmp.* +/src/fst/filters/*-tags.txt +/src/fst/filters/*area-*.regex +/src/fst/filters/remove-all*.regex +/src/fst/filters/remove-homonymy-tags.regex +/src/fst/filters/remove-usage-tags.regex +/src/fst/morphology/*-error-log.txt +/src/fst/morphology/*.tmp.* /src/fst/generated_files/*.lexc -/src/fst/lexicon* -/src/fst/url.lexc -/src/orthography/*-nfc2nfd.* -/src/orthography/*-nfd2nfc.* -/src/phonetics/tests/*.sh +/src/fst/morphology/lexicon* +/src/fst/morphology/url.lexc +/src/fst/orthography/*-nfc2nfd.* +/src/fst/orthography/*-nfd2nfc.* +/src/fst/phonetics/tests/*.sh /test/run-morph-tester.sh /test/run-yaml-testcases.sh /test/src/morphology/all*.txt @@ -148,3 +148,5 @@ Makefile.in build bygg generated* +.deps +.generated diff --git a/m4/giella-config-files.m4 b/m4/giella-config-files.m4 index 82f2d350..d7557b5f 100644 --- a/m4/giella-config-files.m4 +++ b/m4/giella-config-files.m4 @@ -8,15 +8,16 @@ AC_CONFIG_FILES([Makefile \ giella-mdf.pc \ manifest.toml \ src/Makefile \ - src/filters/Makefile \ - src/hyphenation/Makefile \ + src/fst/filters/Makefile \ + src/fst/syllabification/Makefile \ src/fst/Makefile \ - src/orthography/Makefile \ - src/phonetics/Makefile \ - src/phonetics/tests/Makefile \ + src/fst/morphology/Makefile \ + src/fst/orthography/Makefile \ + src/fst/phonetics/Makefile \ + src/fst/phonetics/tests/Makefile \ src/cg3/Makefile \ - src/tagsets/Makefile \ - src/transcriptions/Makefile \ + src/fst/tagsets/Makefile \ + src/fst/transcriptions/Makefile \ docs/Makefile \ test/Makefile \ test/tools/Makefile \ @@ -64,8 +65,8 @@ AC_CONFIG_FILES([Makefile \ # Add one AC_CONFIG_FILES for each script file that needs processing. This gives # the most pleasant user experience and most readable autoconf code to maintain. # Spell checker tests, all languages: -AC_CONFIG_FILES([src/phonetics/tests/run_tests.sh], - [chmod a+x src/phonetics/tests/run_tests.sh]) +AC_CONFIG_FILES([src/fst/phonetics/tests/run_tests.sh], + [chmod a+x src/fst/phonetics/tests/run_tests.sh]) AC_CONFIG_FILES([test/tools/spellcheckers/test-zhfst-file.sh], \ [chmod a+x test/tools/spellcheckers/test-zhfst-file.sh]) AC_CONFIG_FILES([test/tools/spellcheckers/fstbased/desktop/hfst/test-zhfst-basic-sugg-speed.sh], \ diff --git a/m4/giella-macros.m4 b/m4/giella-macros.m4 index 2d7500c5..99092492 100644 --- a/m4/giella-macros.m4 +++ b/m4/giella-macros.m4 @@ -88,7 +88,7 @@ AC_MSG_RESULT([$GIELLA_CORE]) ############################################################### ### This is the version of the Giella Core that we require. ### ### UPDATE AS NEEDED. -_giella_core_min_version=0.20.1 +_giella_core_min_version=0.21.0 # GIELLA_CORE/GTCORE env. variable, required by the infrastructure to find scripts: AC_ARG_VAR([GIELLA_CORE], [directory for the Giella infra core scripts and other required resources]) @@ -845,9 +845,9 @@ AC_ARG_ENABLE([abbr], [enable_abbr=$enableval], [enable_abbr=no]) AS_IF([test x$enable_abbr != xno -a \ - "$(find ${srcdir}/src/fst/stems/ -name "abbreviations.lexc" | head -n 1)" = "" ], + "$(find ${srcdir}/src/fst/morphology/stems/ -name "abbreviations.lexc" | head -n 1)" = "" ], [AC_MSG_ERROR([You asked for abbr.txt generation, but have no file \ -src/fst/stems/abbreviations.lexc])]) +src/fst/morphoogy/stems/abbreviations.lexc])]) AS_IF([test x$enable_abbr = xyes -a x$enable_generators = xno], [AC_MSG_ERROR([You need to enable generators to build the abbr file])]) AM_CONDITIONAL([WANT_ABBR], [test "x$enable_abbr" != xno]) @@ -1002,7 +1002,7 @@ To build, test and install: make install EOF AS_IF([test x$gt_prog_xslt = xno -a \ - "$(find ${srcdir}/src/fst/stems -name "*.xml" | head -n 1)" != "" ], + "$(find ${srcdir}/src/fst/morphology/stems -name "*.xml" | head -n 1)" != "" ], [AC_MSG_WARN([You have XML source files, but XML transformation to LexC is disabled. Please check the output of configure to locate any problems. The LexC files will still compile though. @@ -1048,5 +1048,7 @@ cd .. git clone git@github.com:giellalt/$gt_SHARED_FAILS cd $gt_SHARED_FAILS ./autogen.sh && ./configure && make])]) +AC_MSG_WARN([January 2024: the lexc files and fsts have been moved up to src/fst/morphology]) ]) # gt_PRINT_FOOTER + # vim: set ft=config: diff --git a/src/Makefile.am b/src/Makefile.am index 24831ee2..0ba25914 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -2,79 +2,7 @@ ## Copyright: Sámediggi/Divvun/UiT ## Licence: GPL v3+ -# always build . last here, and tagsets have to be built after morphology -SUBDIRS = fst filters phonetics hyphenation orthography cg3 transcriptions tagsets . - -####### Automake targets: ######## - -# Define target variables first, before assigning to them: -GT_ANALYSERS= -GT_GENERATORS= -CUSTOM_FSTS= - -#### Local modifications in *fst processing: #### -#### -#### Copy the fallback targets, and rename them to the desired targets. Then: -#### Replace the 'cp' command (Xerox) / Prepend the hfst-invert command (Hfst - -#### remember to move the $<) with whatever you need to complete -#### the processing to get the final target transducer. -#### Remember to add the dependencies as well. -#### Also make sure that HFST and Xerox processing are the same. -#### -#### If you add new transducers to be built, you need to add them to the -#### relevant variable, e.g.: -#### -#### if CAN_HFST -#### GT_GENERATORS+=generator-oahpa-gt-norm.hfst -#### endif -#### -#### NB!!!! The HFST targets should get a hyphen after 'analyser'/'generator' -#### respectively, to make the local targets minimally different from and -#### slightly more specific than the fallback targets. This is to avoid warnings -#### about duplicate targets. That is, the local targets should looke like: -#### -#### analyser-%.hfst: analyser-%.tmp.hfst -#### generator-%.hfst: generator-%.tmp.hfst - -################################################################## -#### BEGIN: Add local processing instructions BELOW this line #### -################################################################## - -######################################################## -#### Add language-specific transducer targets here: #### - -#### Xerox transducers: -if CAN_XFST -GT_ANALYSERS+= -GT_GENERATORS+= - -if WANT_CUSTOM_FSTS -CUSTOM_FSTS+= -endif # WANT_CUSTOM_FSTS - -endif # CAN_XFST - -#### HFST transducers -if CAN_HFST -GT_ANALYSERS+= -GT_GENERATORS+= - -if WANT_CUSTOM_FSTS -CUSTOM_FSTS+= -endif # WANT_CUSTOM_FSTS - -endif # CAN_HFST - -#### FOMA transducers -if CAN_FOMA -GT_ANALYSERS+= -GT_GENERATORS+= - -if WANT_CUSTOM_FSTS -CUSTOM_FSTS+= -endif # WANT_CUSTOM_FSTS - -endif # CAN_FOMA +SUBDIRS = fst cg3 ################################################# #### Add language-specific build rules here: #### diff --git a/src/fst/Makefile.am b/src/fst/Makefile.am index 7cb49f78..3f1f9fc7 100644 --- a/src/fst/Makefile.am +++ b/src/fst/Makefile.am @@ -1,191 +1,86 @@ ## Process this file with automake to produce Makefile.in - -## Copyright (C) 2011 Samediggi - -## This program is free software: you can redistribute it and/or modify -## it under the terms of the GNU General Public License as published by -## the Free Software Foundation, either version 3 of the License, or -## (at your option) any later version. - -## This program is distributed in the hope that it will be useful, -## but WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -## GNU General Public License for more details. - -## You should have received a copy of the GNU General Public License -## along with this program. If not, see <http://www.gnu.org/licenses/>. - -# Add language-specific flags for hfst-lexc compilation here: -if HAVE_SHARED_COMMON -HFST_LEXC_LOCAL_FLAGS= --Werror # uncomment if lexc is good enough -else -HFST_LEXC_LOCAL_FLAGS= # No --Werror if deps are missing ! -endif - - -####### Morphology source file defs: ######## - -# Set this to name of lexc file containing Multichar_Symbols and LEXICON Root -GT_LEXC_ROOT=$(srcdir)/root.lexc - -# Set this to the names of all regular lexc source files: -GT_LEXC_SRCS_L1_L2=\ - stems/abbreviations.lexc \ - stems/acronyms.lexc \ - stems/exceptions.lexc \ - stems/adjectives_newwords.lexc \ - stems/adjectives-russian-like_newwords.lexc \ - stems/nouns_newwords.lexc \ - stems/nouns-russian-homographs_newwords.lexc \ - stems/propernouns_newwords.lexc \ - stems/verbs_newwords.lexc \ - compounding.lexc \ - affixes/adjectives.lexc \ - affixes/adpositions.lexc \ - affixes/adverbs.lexc \ - affixes/clitics.lexc \ - affixes/conjunctors.lexc \ - affixes/interjections.lexc \ - affixes/nouns.lexc \ - affixes/numbers.lexc \ - affixes/particles.lexc \ - affixes/pronouns.lexc \ - affixes/propernouns.lexc \ - affixes/symbols.lexc \ - affixes/verbs.lexc - -# If you are building an error-detecting L2 analyser, specify the lexc files -# that differ between the regular L1 and the L2 analysers below, in L1 and -# L2 respectively. L2 files must end in "*-L2.lexc". See SME for an example. -L1= - -L2= - -GT_LEXC_SRCS=\ - $(GT_LEXC_SRCS_L1_L2) \ - $(L1) - -GT_LEXC_L2_SRCS=\ - $(GT_LEXC_SRCS_L1_L2) \ - $(L2) - -# Set this to the names of all generated lexc files, if any -GENERATED_LEXC_SRCS=\ - generated_files/urj-Cyrl-$(GLANG)-propernouns.lexc \ - generated_files/smi-$(GLANG)-digits.lexc \ - generated_files/smi-$(GLANG)-arabic_roman_digits.lexc \ - generated_files/mul-$(GLANG)-symbols.lexc \ - generated_files/mul-$(GLANG)-punctuation.lexc \ - $(srcdir)/stems/A_mdf2x.lexc \ - $(srcdir)/stems/Adp_mdf2x.lexc \ - $(srcdir)/stems/Adv_mdf2x.lexc \ - $(srcdir)/stems/CC_mdf2x.lexc \ - $(srcdir)/stems/Descr_mdf2x.lexc \ - $(srcdir)/stems/Ger_mdf2x.lexc \ - $(srcdir)/stems/Interj_mdf2x.lexc \ - $(srcdir)/stems/N_mdf2x.lexc \ - $(srcdir)/stems/N_Prop_mdf2x.lexc \ - $(srcdir)/stems/Num_mdf2x.lexc \ - $(srcdir)/stems/Pcle_mdf2x.lexc \ - $(srcdir)/stems/Prc_mdf2x.lexc \ - $(srcdir)/stems/V_mdf2x.lexc - - -# change handling of shared lexical data here: -if HAVE_SHARED_COMMON -url.tmp.lexc: $(gt_SHARED_common)/src/fst/url.lexc - $(AM_V_CP)cp -f $< $@ - -generated_files/mul-$(GLANG)-%.lexc: $(gt_SHARED_common)/src/fst/stems/%.lexc - $(AM_V_at)$(MKDIR_P) generated_files - $(AM_V_CP)cp -f $< $@ -else -# this is "safe" fallback (compiles but you miss everything) -url.tmp.lexc: - echo "LEXICON Root" > $@ - echo "< h t t p (s) %: %/ %/ ?*> # ;" >> $@ - -generated_files/mul-$(GLANG)-%.lexc: - $(AM_V_at)$(MKDIR_P) generated_files - echo "! Missing shared common data" > $@ -endif -# add other lexical shared data handling here - - -# $(srcdir)/stems/Pron_mdf2x.lexc -# Set this to the names of all source xml files, if any -GT_XML_SRCS=\ - stems/A_mdf2x.xml \ - stems/Adp_mdf2x.xml \ - stems/Adv_mdf2x.xml \ - stems/CC_mdf2x.xml \ - stems/Descr_mdf2x.xml \ - stems/Ger_mdf2x.xml \ - stems/Interj_mdf2x.xml \ - stems/N_mdf2x.xml \ - stems/N_Prop_mdf2x.xml \ - stems/Num_mdf2x.xml \ - stems/Pcle_mdf2x.xml \ - stems/Prc_mdf2x.xml \ - stems/V_mdf2x.xml - -# stems/Pron_mdf2x.xml -# Define any additional lexc sources here (compiled on their own): -GT_LOCAL_SRCS= - -# Define local xfscripts here: -GT_LOCAL_XFSCRIPT_SRCS= - -# Define here any additional sources just included in the distro: -GT_DISTRO_SRCS= - -### BEGIN: Local processing: ### - -### List additional targets in the following variable, for build targets not -### covered by other means. This comes ***in addition to*** what you can do by -### just targeting lexicon.*: lexicon.tmp.*, and is useful if you want to build -### separate fst's that need further treatment in the src/ dir. See the language -### gle/ for an example of how this is used. - -GIELLA_LOCAL_TARGETS= - -if HAVE_SHARED_URJ_CYRL -generated_files/urj-Cyrl-$(GLANG)-propernouns.lexc: \ - $(gt_SHARED_urj_Cyrl)/src/fst/stems/urj-Cyrl-propernouns.lexc - $(AM_V_at)$(MKDIR_P) generated_files - $(AM_V_GEN)cp -f $< $@ -else -generated_files/urj-Cyrl-$(GLANG)-%.lexc: - $(AM_V_at)$(MKDIR_P) generated_files - $(AM_C_GEN)echo "! Missing shared common data" > $@ -endif - -if HAVE_SHARED_SMI -generated_files/smi-$(GLANG)-%.lexc: \ - $(gt_SHARED_smi)/src/fst/stems/%.lexc - $(AM_V_at)$(MKDIR_P) generated_files - $(AM_V_GEN)cp -f $< $@ - -generated_files/smi-$(GLANG)-%.lexc: \ - $(gt_SHARED_smi)/src/fst/stems/smi-%.lexc - $(AM_V_at)$(MKDIR_P) generated_files - $(AM_V_GEN)cp -f $< $@ -else -generated_files/smi-$(GLANG)-%.lexc: - $(AM_V_at)$(MKDIR_P) generated_files - $(AM_C_GEN)echo "! Missing shared common data" > $@ -endif -### END: Local processing: ### - -####### Other targets: ########### -# Clean: add local clean targets on separate lines, so that the first line can -# easily get updates from the template dir through svn merge. -clean-local: - -rm -f *.all.* *fst *.foma *.script generated_files/*.lexc lexicon.* - -rm -f url.lexc *.tmp* - -rm -f *.relabel lexicon-tags.* lexicon-sigma.* - -include $(srcdir)/Makefile.modifications-phon.am -include $(top_srcdir)/../giella-core/am-shared/src-morphology-dir-include.am - -# vim: set ft=automake: +## Copyright: Sámediggi/Divvun/UiT +## Licence: GPL v3+ + +# always build . last here, and tagsets have to be built after morphology +SUBDIRS = morphology filters phonetics syllabification orthography transcriptions tagsets . + +####### Automake targets: ######## + +# Define target variables first, before assigning to them: +GT_ANALYSERS= +GT_GENERATORS= +CUSTOM_FSTS= + +#### Local modifications in *fst processing: #### +#### +#### Copy the fallback targets, and rename them to the desired targets. Then: +#### Replace the 'cp' command (Xerox) / Prepend the hfst-invert command (Hfst - +#### remember to move the $<) with whatever you need to complete +#### the processing to get the final target transducer. +#### Remember to add the dependencies as well. +#### Also make sure that HFST and Xerox processing are the same. +#### +#### If you add new transducers to be built, you need to add them to the +#### relevant variable, e.g.: +#### +#### if CAN_HFST +#### GT_GENERATORS+=generator-oahpa-gt-norm.hfst +#### endif +#### +#### NB!!!! The HFST targets should get a hyphen after 'analyser'/'generator' +#### respectively, to make the local targets minimally different from and +#### slightly more specific than the fallback targets. This is to avoid warnings +#### about duplicate targets. That is, the local targets should looke like: +#### +#### analyser-%.hfst: analyser-%.tmp.hfst +#### generator-%.hfst: generator-%.tmp.hfst + +################################################################## +#### BEGIN: Add local processing instructions BELOW this line #### +################################################################## + +######################################################## +#### Add language-specific transducer targets here: #### + +#### Xerox transducers: +if CAN_XFST +GT_ANALYSERS+= +GT_GENERATORS+= + +if WANT_CUSTOM_FSTS +CUSTOM_FSTS+= +endif # WANT_CUSTOM_FSTS + +endif # CAN_XFST + +#### HFST transducers +if CAN_HFST +GT_ANALYSERS+= +GT_GENERATORS+= + +if WANT_CUSTOM_FSTS +CUSTOM_FSTS+= +endif # WANT_CUSTOM_FSTS + +endif # CAN_HFST + +#### FOMA transducers +if CAN_FOMA +GT_ANALYSERS+= +GT_GENERATORS+= + +if WANT_CUSTOM_FSTS +CUSTOM_FSTS+= +endif # WANT_CUSTOM_FSTS + +endif # CAN_FOMA + +################################################# +#### Add language-specific build rules here: #### + +################################################################## +#### END: Add local processing instructions ABOVE this line ###### +################################################################## + +include $(top_srcdir)/../giella-core/am-shared/src-fst-dir-include.am diff --git a/src/filters/.gitignore b/src/fst/filters/.gitignore similarity index 100% rename from src/filters/.gitignore rename to src/fst/filters/.gitignore diff --git a/src/filters/Makefile.am b/src/fst/filters/Makefile.am similarity index 69% rename from src/filters/Makefile.am rename to src/fst/filters/Makefile.am index 5c63a617..4724b67d 100644 --- a/src/filters/Makefile.am +++ b/src/fst/filters/Makefile.am @@ -20,22 +20,11 @@ # List any local filter regex files here: GIELLA_FILTER_LOCAL_REGEX_SRCS=\ - downcase_UCletters.regex \ - remove-norm-comp-tags.regex \ - remove-rel_focus-strings.regex \ - remove-hard-sign-in-first-syllable.regex \ - block-illegal_compound-strings.regex \ - insert-default-compounding-tags.regex \ - insert-default_left_compounding-tags.regex \ - remove-derivation-position-tags.regex \ - remove-illegal-derivation-strings.regex \ - remove-illegal-derivation-strings-flagbased.regex \ - rename-POS_before_Der-tags.regex \ - split-CmpN-tags.regex \ - split-CmpNP-tags.regex \ + downcase_UCletters.regex\ + remove-derivation-position-tags.regex \ remove-DNorm-tags.regex \ - convert_to_flags-CmpNP-tags.regex \ - change-optionally-word_boundary_to_hyphen.regex + remove-norm-comp-tags.regex \ + rename-POS_before_Der-tags.regex # List any local filter xfscript files here: GIELLA_FILTER_LOCAL_XFSCRIPT_SRCS= diff --git a/src/filters/block-illegal_compound-strings.regex b/src/fst/filters/block-illegal_compound-strings.regex similarity index 100% rename from src/filters/block-illegal_compound-strings.regex rename to src/fst/filters/block-illegal_compound-strings.regex diff --git a/src/filters/change-optionally-word_boundary_to_hyphen.regex b/src/fst/filters/change-optionally-word_boundary_to_hyphen.regex similarity index 100% rename from src/filters/change-optionally-word_boundary_to_hyphen.regex rename to src/fst/filters/change-optionally-word_boundary_to_hyphen.regex diff --git a/src/filters/convert_to_flags-CmpNP-tags.regex b/src/fst/filters/convert_to_flags-CmpNP-tags.regex similarity index 100% rename from src/filters/convert_to_flags-CmpNP-tags.regex rename to src/fst/filters/convert_to_flags-CmpNP-tags.regex diff --git a/src/filters/downcase_UCletters.regex b/src/fst/filters/downcase_UCletters.regex similarity index 100% rename from src/filters/downcase_UCletters.regex rename to src/fst/filters/downcase_UCletters.regex diff --git a/src/filters/insert-default-compounding-tags.regex b/src/fst/filters/insert-default-compounding-tags.regex similarity index 100% rename from src/filters/insert-default-compounding-tags.regex rename to src/fst/filters/insert-default-compounding-tags.regex diff --git a/src/filters/insert-default_left_compounding-tags.regex b/src/fst/filters/insert-default_left_compounding-tags.regex similarity index 100% rename from src/filters/insert-default_left_compounding-tags.regex rename to src/fst/filters/insert-default_left_compounding-tags.regex diff --git a/src/filters/remove-DNorm-tags.regex b/src/fst/filters/remove-DNorm-tags.regex similarity index 100% rename from src/filters/remove-DNorm-tags.regex rename to src/fst/filters/remove-DNorm-tags.regex diff --git a/src/filters/remove-derivation-position-tags.regex b/src/fst/filters/remove-derivation-position-tags.regex similarity index 100% rename from src/filters/remove-derivation-position-tags.regex rename to src/fst/filters/remove-derivation-position-tags.regex diff --git a/src/filters/remove-hard-sign-in-first-syllable.regex b/src/fst/filters/remove-hard-sign-in-first-syllable.regex similarity index 100% rename from src/filters/remove-hard-sign-in-first-syllable.regex rename to src/fst/filters/remove-hard-sign-in-first-syllable.regex diff --git a/src/filters/remove-illegal-derivation-strings-flagbased.regex b/src/fst/filters/remove-illegal-derivation-strings-flagbased.regex similarity index 100% rename from src/filters/remove-illegal-derivation-strings-flagbased.regex rename to src/fst/filters/remove-illegal-derivation-strings-flagbased.regex diff --git a/src/filters/remove-illegal-derivation-strings.regex b/src/fst/filters/remove-illegal-derivation-strings.regex similarity index 100% rename from src/filters/remove-illegal-derivation-strings.regex rename to src/fst/filters/remove-illegal-derivation-strings.regex diff --git a/src/filters/remove-norm-comp-tags.regex b/src/fst/filters/remove-norm-comp-tags.regex similarity index 100% rename from src/filters/remove-norm-comp-tags.regex rename to src/fst/filters/remove-norm-comp-tags.regex diff --git a/src/filters/remove-rel_focus-strings.regex b/src/fst/filters/remove-rel_focus-strings.regex similarity index 100% rename from src/filters/remove-rel_focus-strings.regex rename to src/fst/filters/remove-rel_focus-strings.regex diff --git a/src/filters/rename-POS_before_Der-tags.regex b/src/fst/filters/rename-POS_before_Der-tags.regex similarity index 100% rename from src/filters/rename-POS_before_Der-tags.regex rename to src/fst/filters/rename-POS_before_Der-tags.regex diff --git a/src/filters/split-CmpN-tags.regex b/src/fst/filters/split-CmpN-tags.regex similarity index 100% rename from src/filters/split-CmpN-tags.regex rename to src/fst/filters/split-CmpN-tags.regex diff --git a/src/filters/split-CmpNP-tags.regex b/src/fst/filters/split-CmpNP-tags.regex similarity index 100% rename from src/filters/split-CmpNP-tags.regex rename to src/fst/filters/split-CmpNP-tags.regex diff --git a/src/fst/morphology/Makefile.am b/src/fst/morphology/Makefile.am new file mode 100644 index 00000000..abe2b795 --- /dev/null +++ b/src/fst/morphology/Makefile.am @@ -0,0 +1,159 @@ +## Process this file with automake to produce Makefile.in + +## Copyright (C) 2011 Samediggi + +## This program is free software: you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation, either version 3 of the License, or +## (at your option) any later version. + +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. + +## You should have received a copy of the GNU General Public License +## along with this program. If not, see <http://www.gnu.org/licenses/>. + +# Add language-specific flags for hfst-lexc compilation here: +if HAVE_SHARED_COMMON +HFST_LEXC_LOCAL_FLAGS= # --Werror # uncomment if lexc is good enough +else +HFST_LEXC_LOCAL_FLAGS= # No --Werror if deps are missing ! +endif + + +####### Morphology source file defs: ######## + +# Set this to name of lexc file containing Multichar_Symbols and LEXICON Root +GT_LEXC_ROOT=$(srcdir)/root.lexc + +# Set this to the names of all regular lexc source files: +GT_LEXC_SRCS_L1_L2=\ + stems/abbreviations.lexc \ + stems/acronyms.lexc \ + stems/exceptions.lexc \ + stems/adjectives_newwords.lexc \ + stems/adjectives-russian-like_newwords.lexc \ + stems/nouns_newwords.lexc \ + stems/nouns-russian-homographs_newwords.lexc \ + stems/propernouns_newwords.lexc \ + stems/verbs_newwords.lexc \ + compounding.lexc \ + affixes/adjectives.lexc \ + affixes/adpositions.lexc \ + affixes/adverbs.lexc \ + affixes/clitics.lexc \ + affixes/conjunctors.lexc \ + affixes/interjections.lexc \ + affixes/nouns.lexc \ + affixes/numbers.lexc \ + affixes/particles.lexc \ + affixes/pronouns.lexc \ + affixes/propernouns.lexc \ + affixes/symbols.lexc \ + affixes/verbs.lexc + +# If you are building an error-detecting L2 analyser, specify the lexc files +# that differ between the regular L1 and the L2 analysers below, in L1 and +# L2 respectively. L2 files must end in "*-L2.lexc". See SME for an example. +L1= + +L2= + +GT_LEXC_SRCS=\ + $(GT_LEXC_SRCS_L1_L2) \ + $(L1) + +GT_LEXC_L2_SRCS=\ + $(GT_LEXC_SRCS_L1_L2) \ + $(L2) + +# Set this to the names of all generated lexc files, if any +GENERATED_LEXC_SRCS=\ + generated_files/urj-Cyrl-$(GLANG)-propernouns.lexc \ + generated_files/smi-$(GLANG)-digits.lexc \ + generated_files/smi-$(GLANG)-arabic_roman_digits.lexc \ + generated_files/mul-$(GLANG)-symbols.lexc \ + generated_files/mul-$(GLANG)-punctuation.lexc \ + $(srcdir)/stems/A_mdf2x.lexc \ + $(srcdir)/stems/Adp_mdf2x.lexc \ + $(srcdir)/stems/Adv_mdf2x.lexc \ + $(srcdir)/stems/CC_mdf2x.lexc \ + $(srcdir)/stems/Descr_mdf2x.lexc \ + $(srcdir)/stems/Ger_mdf2x.lexc \ + $(srcdir)/stems/Interj_mdf2x.lexc \ + $(srcdir)/stems/N_mdf2x.lexc \ + $(srcdir)/stems/N_Prop_mdf2x.lexc \ + $(srcdir)/stems/Num_mdf2x.lexc \ + $(srcdir)/stems/Pcle_mdf2x.lexc \ + $(srcdir)/stems/Prc_mdf2x.lexc \ + $(srcdir)/stems/V_mdf2x.lexc + +# change handling of shared lexical data here: +if HAVE_SHARED_COMMON +.generated/url.tmp.lexc: $(gt_SHARED_common)/src/fst/url.lexc + $(MAKE) $(GENDIR) + $(AM_V_CP)cp -f $< $@ + +generated_files/mul-$(GLANG)-%.lexc: $(gt_SHARED_common)/src/fst/stems/%.lexc + $(MAKE) $(GENDIR) + $(AM_V_CP)cp -f $< $@ +else +# this is "safe" fallback (compiles but you miss everything) +.generated/url.tmp.lexc: + echo "LEXICON Root" > $@ + echo "< h t t p (s) %: %/ %/ ?*> # ;" >> $@ + +.generated/mul-$(GLANG)-%.lexc: + $(MAKE) $(GENDIR) + echo "! Missing shared common data" > $@ +endif +# add other lexical shared data handling here + +# Set this to the names of all source xml files, if any +GT_XML_SRCS=\ + stems/A_mdf2x.xml \ + stems/Adp_mdf2x.xml \ + stems/Adv_mdf2x.xml \ + stems/CC_mdf2x.xml \ + stems/Descr_mdf2x.xml \ + stems/Ger_mdf2x.xml \ + stems/Interj_mdf2x.xml \ + stems/N_mdf2x.xml \ + stems/N_Prop_mdf2x.xml \ + stems/Num_mdf2x.xml \ + stems/Pcle_mdf2x.xml \ + stems/Prc_mdf2x.xml \ + stems/V_mdf2x.xml + +# Define any additional lexc sources here (compiled on their own): +GT_LOCAL_SRCS= + +# Define local xfscripts here: +GT_LOCAL_XFSCRIPT_SRCS= + +# Define here any additional sources just included in the distro: +GT_DISTRO_SRCS= + +### BEGIN: Local processing: ### + +### List additional targets in the following variable, for build targets not +### covered by other means. This comes ***in addition to*** what you can do by +### just targeting lexicon.*: lexicon.tmp.*, and is useful if you want to build +### separate fst's that need further treatment in the src/ dir. See the language +### gle/ for an example of how this is used. + +GIELLA_LOCAL_TARGETS= + +### END: Local processing: ### + +####### Other targets: ########### +# Clean: add local clean targets on separate lines, so that the first line can +# easily get updates from the template dir through svn merge. + +include $(srcdir)/Makefile.modifications-local.am +include $(srcdir)/Makefile.modifications-phon.am +include $(top_srcdir)/../giella-core/am-shared/src-morphology-dir-include.am + +# vim: set ft=automake: diff --git a/src/fst/morphology/Makefile.modifications-local.am b/src/fst/morphology/Makefile.modifications-local.am new file mode 100644 index 00000000..eee99b97 --- /dev/null +++ b/src/fst/morphology/Makefile.modifications-local.am @@ -0,0 +1,45 @@ +## Process this file with automake to produce Makefile.in + +## Copyright (C) 2011 Samediggi + +## This program is free software: you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation, either version 3 of the License, or +## (at your option) any later version. + +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. + +## You should have received a copy of the GNU General Public License +## along with this program. If not, see <http://www.gnu.org/licenses/>. + +# Add local build rules and shared here... + +if HAVE_SHARED_URJ_CYRL +generated_files/urj-Cyrl-$(GLANG)-propernouns.lexc: \ + $(gt_SHARED_urj_Cyrl)/src/fst/stems/urj-Cyrl-propernouns.lexc + $(AM_V_at)$(MKDIR_P) generated_files + $(AM_V_GEN)cp -f $< $@ +else +generated_files/urj-Cyrl-$(GLANG)-%.lexc: + $(AM_V_at)$(MKDIR_P) generated_files + $(AM_C_GEN)echo "! Missing shared common data" > $@ +endif + +if HAVE_SHARED_SMI +generated_files/smi-$(GLANG)-%.lexc: \ + $(gt_SHARED_smi)/src/fst/stems/%.lexc + $(AM_V_at)$(MKDIR_P) generated_files + $(AM_V_GEN)cp -f $< $@ + +generated_files/smi-$(GLANG)-%.lexc: \ + $(gt_SHARED_smi)/src/fst/stems/smi-%.lexc + $(AM_V_at)$(MKDIR_P) generated_files + $(AM_V_GEN)cp -f $< $@ +else +generated_files/smi-$(GLANG)-%.lexc: + $(AM_V_at)$(MKDIR_P) generated_files + $(AM_C_GEN)echo "! Missing shared common data" > $@ +endif diff --git a/src/fst/morphology/Makefile.modifications-phon.am b/src/fst/morphology/Makefile.modifications-phon.am new file mode 100644 index 00000000..8c872aee --- /dev/null +++ b/src/fst/morphology/Makefile.modifications-phon.am @@ -0,0 +1,26 @@ +## Process this file with automake to produce Makefile.in + +## Copyright (C) 2011 Samediggi + +## This program is free software: you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation, either version 3 of the License, or +## (at your option) any later version. + +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. + +## You should have received a copy of the GNU General Public License +## along with this program. If not, see <http://www.gnu.org/licenses/>. + +####### Source file defs: ######## + +#! @param GT_TWOLC_MAIN required, the source of phonology +#! @param GT_TWOLC_SUPPLEMENTS optional, other sources to distribute and +#! compile for other things +GT_PHONOLOGY_MAIN=phonology.twolc +GT_PHONOLOGY_SUPPLEMENTS= + +# vim: set ft=automake: diff --git a/src/fst/affixes/adjectives.lexc b/src/fst/morphology/affixes/adjectives.lexc similarity index 100% rename from src/fst/affixes/adjectives.lexc rename to src/fst/morphology/affixes/adjectives.lexc diff --git a/src/fst/affixes/adpositions.lexc b/src/fst/morphology/affixes/adpositions.lexc similarity index 100% rename from src/fst/affixes/adpositions.lexc rename to src/fst/morphology/affixes/adpositions.lexc diff --git a/src/fst/affixes/adverbs.lexc b/src/fst/morphology/affixes/adverbs.lexc similarity index 100% rename from src/fst/affixes/adverbs.lexc rename to src/fst/morphology/affixes/adverbs.lexc diff --git a/src/fst/affixes/clitics.lexc b/src/fst/morphology/affixes/clitics.lexc similarity index 100% rename from src/fst/affixes/clitics.lexc rename to src/fst/morphology/affixes/clitics.lexc diff --git a/src/fst/affixes/conjunctors.lexc b/src/fst/morphology/affixes/conjunctors.lexc similarity index 100% rename from src/fst/affixes/conjunctors.lexc rename to src/fst/morphology/affixes/conjunctors.lexc diff --git a/src/fst/affixes/interjections.lexc b/src/fst/morphology/affixes/interjections.lexc similarity index 100% rename from src/fst/affixes/interjections.lexc rename to src/fst/morphology/affixes/interjections.lexc diff --git a/src/fst/affixes/nouns.lexc b/src/fst/morphology/affixes/nouns.lexc similarity index 100% rename from src/fst/affixes/nouns.lexc rename to src/fst/morphology/affixes/nouns.lexc diff --git a/src/fst/affixes/numbers.lexc b/src/fst/morphology/affixes/numbers.lexc similarity index 100% rename from src/fst/affixes/numbers.lexc rename to src/fst/morphology/affixes/numbers.lexc diff --git a/src/fst/affixes/particles.lexc b/src/fst/morphology/affixes/particles.lexc similarity index 100% rename from src/fst/affixes/particles.lexc rename to src/fst/morphology/affixes/particles.lexc diff --git a/src/fst/affixes/pronouns.lexc b/src/fst/morphology/affixes/pronouns.lexc similarity index 100% rename from src/fst/affixes/pronouns.lexc rename to src/fst/morphology/affixes/pronouns.lexc diff --git a/src/fst/affixes/propernouns.lexc b/src/fst/morphology/affixes/propernouns.lexc similarity index 100% rename from src/fst/affixes/propernouns.lexc rename to src/fst/morphology/affixes/propernouns.lexc diff --git a/src/fst/affixes/symbols.lexc b/src/fst/morphology/affixes/symbols.lexc similarity index 100% rename from src/fst/affixes/symbols.lexc rename to src/fst/morphology/affixes/symbols.lexc diff --git a/src/fst/affixes/verbs.lexc b/src/fst/morphology/affixes/verbs.lexc similarity index 100% rename from src/fst/affixes/verbs.lexc rename to src/fst/morphology/affixes/verbs.lexc diff --git a/src/fst/compounding.lexc b/src/fst/morphology/compounding.lexc similarity index 100% rename from src/fst/compounding.lexc rename to src/fst/morphology/compounding.lexc diff --git a/src/fst/generated_files/00README.txt b/src/fst/morphology/generated_files/00README.txt similarity index 100% rename from src/fst/generated_files/00README.txt rename to src/fst/morphology/generated_files/00README.txt diff --git a/src/fst/incoming/00README.txt b/src/fst/morphology/incoming/00README.txt similarity index 100% rename from src/fst/incoming/00README.txt rename to src/fst/morphology/incoming/00README.txt diff --git a/src/fst/phonology.twolc b/src/fst/morphology/phonology.twolc similarity index 100% rename from src/fst/phonology.twolc rename to src/fst/morphology/phonology.twolc diff --git a/src/fst/root.lexc b/src/fst/morphology/root.lexc similarity index 100% rename from src/fst/root.lexc rename to src/fst/morphology/root.lexc diff --git a/src/fst/stems/A_mdf2x.lexc b/src/fst/morphology/stems/A_mdf2x.lexc similarity index 100% rename from src/fst/stems/A_mdf2x.lexc rename to src/fst/morphology/stems/A_mdf2x.lexc diff --git a/src/fst/stems/A_mdf2x.xml b/src/fst/morphology/stems/A_mdf2x.xml similarity index 100% rename from src/fst/stems/A_mdf2x.xml rename to src/fst/morphology/stems/A_mdf2x.xml diff --git a/src/fst/stems/Adp_mdf2x.lexc b/src/fst/morphology/stems/Adp_mdf2x.lexc similarity index 100% rename from src/fst/stems/Adp_mdf2x.lexc rename to src/fst/morphology/stems/Adp_mdf2x.lexc diff --git a/src/fst/stems/Adp_mdf2x.xml b/src/fst/morphology/stems/Adp_mdf2x.xml similarity index 100% rename from src/fst/stems/Adp_mdf2x.xml rename to src/fst/morphology/stems/Adp_mdf2x.xml diff --git a/src/fst/stems/Adv_mdf2x.lexc b/src/fst/morphology/stems/Adv_mdf2x.lexc similarity index 100% rename from src/fst/stems/Adv_mdf2x.lexc rename to src/fst/morphology/stems/Adv_mdf2x.lexc diff --git a/src/fst/stems/Adv_mdf2x.xml b/src/fst/morphology/stems/Adv_mdf2x.xml similarity index 100% rename from src/fst/stems/Adv_mdf2x.xml rename to src/fst/morphology/stems/Adv_mdf2x.xml diff --git a/src/fst/stems/CC_mdf2x.lexc b/src/fst/morphology/stems/CC_mdf2x.lexc similarity index 100% rename from src/fst/stems/CC_mdf2x.lexc rename to src/fst/morphology/stems/CC_mdf2x.lexc diff --git a/src/fst/stems/CC_mdf2x.xml b/src/fst/morphology/stems/CC_mdf2x.xml similarity index 100% rename from src/fst/stems/CC_mdf2x.xml rename to src/fst/morphology/stems/CC_mdf2x.xml diff --git a/src/fst/stems/CS_mdf2x.lexc b/src/fst/morphology/stems/CS_mdf2x.lexc similarity index 100% rename from src/fst/stems/CS_mdf2x.lexc rename to src/fst/morphology/stems/CS_mdf2x.lexc diff --git a/src/fst/stems/CS_mdf2x.xml b/src/fst/morphology/stems/CS_mdf2x.xml similarity index 100% rename from src/fst/stems/CS_mdf2x.xml rename to src/fst/morphology/stems/CS_mdf2x.xml diff --git a/src/fst/stems/Descr_mdf2x.lexc b/src/fst/morphology/stems/Descr_mdf2x.lexc similarity index 100% rename from src/fst/stems/Descr_mdf2x.lexc rename to src/fst/morphology/stems/Descr_mdf2x.lexc diff --git a/src/fst/stems/Descr_mdf2x.xml b/src/fst/morphology/stems/Descr_mdf2x.xml similarity index 100% rename from src/fst/stems/Descr_mdf2x.xml rename to src/fst/morphology/stems/Descr_mdf2x.xml diff --git a/src/fst/stems/Ger_mdf2x.lexc b/src/fst/morphology/stems/Ger_mdf2x.lexc similarity index 100% rename from src/fst/stems/Ger_mdf2x.lexc rename to src/fst/morphology/stems/Ger_mdf2x.lexc diff --git a/src/fst/stems/Ger_mdf2x.xml b/src/fst/morphology/stems/Ger_mdf2x.xml similarity index 100% rename from src/fst/stems/Ger_mdf2x.xml rename to src/fst/morphology/stems/Ger_mdf2x.xml diff --git a/src/fst/stems/Interj_mdf2x.lexc b/src/fst/morphology/stems/Interj_mdf2x.lexc similarity index 100% rename from src/fst/stems/Interj_mdf2x.lexc rename to src/fst/morphology/stems/Interj_mdf2x.lexc diff --git a/src/fst/stems/Interj_mdf2x.xml b/src/fst/morphology/stems/Interj_mdf2x.xml similarity index 100% rename from src/fst/stems/Interj_mdf2x.xml rename to src/fst/morphology/stems/Interj_mdf2x.xml diff --git a/src/fst/stems/N_Prop_mdf2x.lexc b/src/fst/morphology/stems/N_Prop_mdf2x.lexc similarity index 100% rename from src/fst/stems/N_Prop_mdf2x.lexc rename to src/fst/morphology/stems/N_Prop_mdf2x.lexc diff --git a/src/fst/stems/N_Prop_mdf2x.xml b/src/fst/morphology/stems/N_Prop_mdf2x.xml similarity index 100% rename from src/fst/stems/N_Prop_mdf2x.xml rename to src/fst/morphology/stems/N_Prop_mdf2x.xml diff --git a/src/fst/stems/N_mdf2x.lexc b/src/fst/morphology/stems/N_mdf2x.lexc similarity index 100% rename from src/fst/stems/N_mdf2x.lexc rename to src/fst/morphology/stems/N_mdf2x.lexc diff --git a/src/fst/stems/N_mdf2x.xml b/src/fst/morphology/stems/N_mdf2x.xml similarity index 100% rename from src/fst/stems/N_mdf2x.xml rename to src/fst/morphology/stems/N_mdf2x.xml diff --git a/src/fst/stems/Num_mdf2x.lexc b/src/fst/morphology/stems/Num_mdf2x.lexc similarity index 100% rename from src/fst/stems/Num_mdf2x.lexc rename to src/fst/morphology/stems/Num_mdf2x.lexc diff --git a/src/fst/stems/Num_mdf2x.xml b/src/fst/morphology/stems/Num_mdf2x.xml similarity index 100% rename from src/fst/stems/Num_mdf2x.xml rename to src/fst/morphology/stems/Num_mdf2x.xml diff --git a/src/fst/stems/Pcle_mdf2x.lexc b/src/fst/morphology/stems/Pcle_mdf2x.lexc similarity index 100% rename from src/fst/stems/Pcle_mdf2x.lexc rename to src/fst/morphology/stems/Pcle_mdf2x.lexc diff --git a/src/fst/stems/Pcle_mdf2x.xml b/src/fst/morphology/stems/Pcle_mdf2x.xml similarity index 100% rename from src/fst/stems/Pcle_mdf2x.xml rename to src/fst/morphology/stems/Pcle_mdf2x.xml diff --git a/src/fst/stems/Prc_mdf2x.lexc b/src/fst/morphology/stems/Prc_mdf2x.lexc similarity index 100% rename from src/fst/stems/Prc_mdf2x.lexc rename to src/fst/morphology/stems/Prc_mdf2x.lexc diff --git a/src/fst/stems/Prc_mdf2x.xml b/src/fst/morphology/stems/Prc_mdf2x.xml similarity index 100% rename from src/fst/stems/Prc_mdf2x.xml rename to src/fst/morphology/stems/Prc_mdf2x.xml diff --git a/src/fst/stems/Pron_mdf2x.lexc b/src/fst/morphology/stems/Pron_mdf2x.lexc similarity index 100% rename from src/fst/stems/Pron_mdf2x.lexc rename to src/fst/morphology/stems/Pron_mdf2x.lexc diff --git a/src/fst/stems/Pron_mdf2x.xml b/src/fst/morphology/stems/Pron_mdf2x.xml similarity index 100% rename from src/fst/stems/Pron_mdf2x.xml rename to src/fst/morphology/stems/Pron_mdf2x.xml diff --git a/src/fst/stems/V_mdf2x.lexc b/src/fst/morphology/stems/V_mdf2x.lexc similarity index 100% rename from src/fst/stems/V_mdf2x.lexc rename to src/fst/morphology/stems/V_mdf2x.lexc diff --git a/src/fst/stems/V_mdf2x.xml b/src/fst/morphology/stems/V_mdf2x.xml similarity index 100% rename from src/fst/stems/V_mdf2x.xml rename to src/fst/morphology/stems/V_mdf2x.xml diff --git a/src/fst/stems/abbreviations.lexc b/src/fst/morphology/stems/abbreviations.lexc similarity index 100% rename from src/fst/stems/abbreviations.lexc rename to src/fst/morphology/stems/abbreviations.lexc diff --git a/src/fst/stems/acronyms.lexc b/src/fst/morphology/stems/acronyms.lexc similarity index 100% rename from src/fst/stems/acronyms.lexc rename to src/fst/morphology/stems/acronyms.lexc diff --git a/src/fst/stems/adjectives-mwe.xml b/src/fst/morphology/stems/adjectives-mwe.xml similarity index 100% rename from src/fst/stems/adjectives-mwe.xml rename to src/fst/morphology/stems/adjectives-mwe.xml diff --git a/src/fst/stems/adjectives-russian-like_newwords.lexc b/src/fst/morphology/stems/adjectives-russian-like_newwords.lexc similarity index 100% rename from src/fst/stems/adjectives-russian-like_newwords.lexc rename to src/fst/morphology/stems/adjectives-russian-like_newwords.lexc diff --git a/src/fst/stems/adjectives_newwords.lexc b/src/fst/morphology/stems/adjectives_newwords.lexc similarity index 100% rename from src/fst/stems/adjectives_newwords.lexc rename to src/fst/morphology/stems/adjectives_newwords.lexc diff --git a/src/fst/stems/adverbs-mwe.xml b/src/fst/morphology/stems/adverbs-mwe.xml similarity index 100% rename from src/fst/stems/adverbs-mwe.xml rename to src/fst/morphology/stems/adverbs-mwe.xml diff --git a/src/fst/stems/exceptions.lexc b/src/fst/morphology/stems/exceptions.lexc similarity index 100% rename from src/fst/stems/exceptions.lexc rename to src/fst/morphology/stems/exceptions.lexc diff --git a/src/fst/stems/gt_dictionary.dtd b/src/fst/morphology/stems/gt_dictionary.dtd similarity index 100% rename from src/fst/stems/gt_dictionary.dtd rename to src/fst/morphology/stems/gt_dictionary.dtd diff --git a/src/fst/stems/interjections-mwe.xml b/src/fst/morphology/stems/interjections-mwe.xml similarity index 100% rename from src/fst/stems/interjections-mwe.xml rename to src/fst/morphology/stems/interjections-mwe.xml diff --git a/src/fst/stems/nouns-mwe.xml b/src/fst/morphology/stems/nouns-mwe.xml similarity index 100% rename from src/fst/stems/nouns-mwe.xml rename to src/fst/morphology/stems/nouns-mwe.xml diff --git a/src/fst/stems/nouns-russian-homographs_newwords.lexc b/src/fst/morphology/stems/nouns-russian-homographs_newwords.lexc similarity index 100% rename from src/fst/stems/nouns-russian-homographs_newwords.lexc rename to src/fst/morphology/stems/nouns-russian-homographs_newwords.lexc diff --git a/src/fst/stems/nouns_newwords.lexc b/src/fst/morphology/stems/nouns_newwords.lexc similarity index 100% rename from src/fst/stems/nouns_newwords.lexc rename to src/fst/morphology/stems/nouns_newwords.lexc diff --git a/src/fst/stems/phrase_adverbs.xml b/src/fst/morphology/stems/phrase_adverbs.xml similarity index 100% rename from src/fst/stems/phrase_adverbs.xml rename to src/fst/morphology/stems/phrase_adverbs.xml diff --git a/src/fst/stems/phrase_nouns.xml b/src/fst/morphology/stems/phrase_nouns.xml similarity index 100% rename from src/fst/stems/phrase_nouns.xml rename to src/fst/morphology/stems/phrase_nouns.xml diff --git a/src/fst/stems/propernouns_newwords.lexc b/src/fst/morphology/stems/propernouns_newwords.lexc similarity index 100% rename from src/fst/stems/propernouns_newwords.lexc rename to src/fst/morphology/stems/propernouns_newwords.lexc diff --git a/src/fst/stems/verbs-mwe.xml b/src/fst/morphology/stems/verbs-mwe.xml similarity index 100% rename from src/fst/stems/verbs-mwe.xml rename to src/fst/morphology/stems/verbs-mwe.xml diff --git a/src/fst/stems/verbs_newwords.lexc b/src/fst/morphology/stems/verbs_newwords.lexc similarity index 100% rename from src/fst/stems/verbs_newwords.lexc rename to src/fst/morphology/stems/verbs_newwords.lexc diff --git a/src/orthography/Makefile.am b/src/fst/orthography/Makefile.am similarity index 100% rename from src/orthography/Makefile.am rename to src/fst/orthography/Makefile.am diff --git a/src/orthography/allcaps.xfscript b/src/fst/orthography/allcaps.xfscript similarity index 100% rename from src/orthography/allcaps.xfscript rename to src/fst/orthography/allcaps.xfscript diff --git a/src/orthography/downcase-derived_proper-strings.xfscript b/src/fst/orthography/downcase-derived_proper-strings.xfscript similarity index 100% rename from src/orthography/downcase-derived_proper-strings.xfscript rename to src/fst/orthography/downcase-derived_proper-strings.xfscript diff --git a/src/orthography/inituppercase.regex b/src/fst/orthography/inituppercase.regex similarity index 100% rename from src/orthography/inituppercase.regex rename to src/fst/orthography/inituppercase.regex diff --git a/src/orthography/spellrelax-mobile-keyboard.regex b/src/fst/orthography/spellrelax-mobile-keyboard.regex similarity index 100% rename from src/orthography/spellrelax-mobile-keyboard.regex rename to src/fst/orthography/spellrelax-mobile-keyboard.regex diff --git a/src/orthography/spellrelax-tags.regex b/src/fst/orthography/spellrelax-tags.regex similarity index 100% rename from src/orthography/spellrelax-tags.regex rename to src/fst/orthography/spellrelax-tags.regex diff --git a/src/orthography/spellrelax-with-tags.xfscript b/src/fst/orthography/spellrelax-with-tags.xfscript similarity index 100% rename from src/orthography/spellrelax-with-tags.xfscript rename to src/fst/orthography/spellrelax-with-tags.xfscript diff --git a/src/orthography/spellrelax.regex b/src/fst/orthography/spellrelax.regex similarity index 100% rename from src/orthography/spellrelax.regex rename to src/fst/orthography/spellrelax.regex diff --git a/src/phonetics/Makefile.am b/src/fst/phonetics/Makefile.am similarity index 100% rename from src/phonetics/Makefile.am rename to src/fst/phonetics/Makefile.am diff --git a/src/phonetics/tests/Makefile.am b/src/fst/phonetics/tests/Makefile.am similarity index 100% rename from src/phonetics/tests/Makefile.am rename to src/fst/phonetics/tests/Makefile.am diff --git a/src/phonetics/tests/run_tests.sh.in b/src/fst/phonetics/tests/run_tests.sh.in similarity index 100% rename from src/phonetics/tests/run_tests.sh.in rename to src/fst/phonetics/tests/run_tests.sh.in diff --git a/src/fst/phonetics/tests/tests/Makefile.am b/src/fst/phonetics/tests/tests/Makefile.am new file mode 100644 index 00000000..dc17381e --- /dev/null +++ b/src/fst/phonetics/tests/tests/Makefile.am @@ -0,0 +1,19 @@ +## Process this file with automake to produce Makefile.in +## Copyright: Sámediggi/Divvun/UiT +## Licence: GPL v3+ + +######## Test targets: ########### + +if WANT_PHONETIC + +# List here (space separated) all yaml files to be run as part of make check: +TESTS= + +# List tests that are presently (expected) failures here, ie things that should +# be fixed *later*, but is not critical at the moment: +XFAIL_TESTS= + +endif # WANT_PHONETIC + +#### Do NOT edit below here: #### +include $(top_srcdir)/../giella-core/am-shared/src-phonetics-tests-dir-include.am diff --git a/src/fst/phonetics/tests/tests/run_tests.sh.in b/src/fst/phonetics/tests/tests/run_tests.sh.in new file mode 100644 index 00000000..baaa6f84 --- /dev/null +++ b/src/fst/phonetics/tests/tests/run_tests.sh.in @@ -0,0 +1,89 @@ +#!/bin/bash +## Process this file with configure to produce the actual shell script +## Copyright: Sámediggi/Divvun/UiT +## Licence: GPL v3+ + +# Test runner to test conversion to IPA. + +# Use autotools mechanisms to only run the configured fst types in the tests: +fsttype= +@CAN_HFST_TRUE@fsttype="$fsttype hfst" +@CAN_XFST_TRUE@fsttype="$fsttype xfst" +@CAN_FOMA_TRUE@fsttype="$fsttype foma" + +# Exit if all fst types have been shut off: +if [[ "x$fsttype" == "x" ]]; then + echo "All transducer types have been shut off at configure time." + echo "Nothing to test. SKIPPED." + exit 77 +fi + +fst_num=$(echo "$fsttype" | wc -w) +# Debug: echo Number of fst´s: $fst_num + +fst=$(grep -v '^#' $1 | grep -v '^\s*$' | grep 'fst' | cut -f2) + +# Debug: +# echo FST: $fst + +grep -v '^#' $1 | grep -v '^\s*$' | tail -n +2 | cut -f1 > innput.txt +grep -v '^#' $1 | grep -v '^\s*$' | tail -n +2 | cut -f2 > expect.txt + +###### Start testing: ####### +transducer_found=0 +fails=0 + +# .---------- constant part! +# vvvv vvvv-- colour code +RED='\033[0;31m' +GREEN='\033[0;32m' +BOLD='\033[1m' +NC='\033[0m' # No Color + + +# Loop over the transducer types first - we test both hfst, xfst and foma +# according to the configuration: +for f in $fsttype; do + # DEBUG: echo "Fst loop 35: $f" + if test $f == "xfst"; then + lookuptool="@LOOKUP@ -flags mbTT" + elif test $f == "foma"; then + lookuptool="@FLOOKUP@" + elif test $f == "hfst"; then + lookuptool="@HFST_LOOKUP@ -q" + else + let "Fail += 1" + echo "FAIL: Unknown fst type! FST=$f" + continue + fi + # Run lookup, then clean the output as follows before saving: + # 1. remove extra empty lines + # 2. convert two or more newlines to XXYYZZ (two newlines are cohort separators) + # 3. convert single newlines to ' XXXX ' - marks multiple outputs from single inut + # 4. convert XXYYZZ back to a single newline + # 5. cleanup on the last line + $lookuptool ../$fst.$f < innput.txt | cut -f1-2 \ + | sed '${/^[[:space:]]*$/d;}' \ + | perl -0pe 's/\n\n+/XXYYZZ/g;' \ + | perl -0pe 's/\n/ XXXX /g;' \ + | perl -pe 's/XXYYZZ/\n/g' \ + | sed '$ s/ XXXX $/\n/' > output.${f}.txt + + # The actual test: + diff expect.txt <(cut -f2- output.hfst.txt) + + # Just to be sure, capture the output value - it might give different + # results for hfst, foma and xfst (that would be a bug in the offending + # one, but one never knows). This way the test will fail even if the last + # fst type being tested was successful if one of the earlier types failed. + # + # And if more than one fst type is tested, print output status for each: + if (($? > 0)) ; then + fails=$((fails+1)) + (($fst_num > 1)) && echo -e " ${BOLD}FAILED: ${RED}$f${NC}" + else + (($fst_num > 1)) && echo -e " ${BOLD}PASSED: ${GREEN}$f${NC}" + fi +done + +exit $fails diff --git a/src/phonetics/txt2ipa.xfscript b/src/fst/phonetics/txt2ipa.xfscript similarity index 100% rename from src/phonetics/txt2ipa.xfscript rename to src/fst/phonetics/txt2ipa.xfscript diff --git a/src/hyphenation/Makefile.am b/src/fst/syllabification/Makefile.am similarity index 100% rename from src/hyphenation/Makefile.am rename to src/fst/syllabification/Makefile.am diff --git a/src/hyphenation/hyphenation.xfscript b/src/fst/syllabification/hyphenation.xfscript similarity index 100% rename from src/hyphenation/hyphenation.xfscript rename to src/fst/syllabification/hyphenation.xfscript diff --git a/src/tagsets/Makefile.am b/src/fst/tagsets/Makefile.am similarity index 100% rename from src/tagsets/Makefile.am rename to src/fst/tagsets/Makefile.am diff --git a/src/transcriptions/Makefile.am b/src/fst/transcriptions/Makefile.am similarity index 100% rename from src/transcriptions/Makefile.am rename to src/fst/transcriptions/Makefile.am diff --git a/src/transcriptions/transcriptor-abbrevs2text.lexc b/src/fst/transcriptions/transcriptor-abbrevs2text.lexc similarity index 100% rename from src/transcriptions/transcriptor-abbrevs2text.lexc rename to src/fst/transcriptions/transcriptor-abbrevs2text.lexc diff --git a/src/transcriptions/transcriptor-clock-digit2text.lexc b/src/fst/transcriptions/transcriptor-clock-digit2text.lexc similarity index 100% rename from src/transcriptions/transcriptor-clock-digit2text.lexc rename to src/fst/transcriptions/transcriptor-clock-digit2text.lexc diff --git a/src/transcriptions/transcriptor-date-digit2text.lexc b/src/fst/transcriptions/transcriptor-date-digit2text.lexc similarity index 100% rename from src/transcriptions/transcriptor-date-digit2text.lexc rename to src/fst/transcriptions/transcriptor-date-digit2text.lexc diff --git a/src/transcriptions/transcriptor-numbers-digit2text.lexc b/src/fst/transcriptions/transcriptor-numbers-digit2text.lexc similarity index 100% rename from src/transcriptions/transcriptor-numbers-digit2text.lexc rename to src/fst/transcriptions/transcriptor-numbers-digit2text.lexc diff --git a/test/src/morphology/generate-adjective-lemmas.sh.in b/test/src/morphology/generate-adjective-lemmas.sh.in index 727fb868..a6c7704c 100755 --- a/test/src/morphology/generate-adjective-lemmas.sh.in +++ b/test/src/morphology/generate-adjective-lemmas.sh.in @@ -23,9 +23,9 @@ POS=A_mdf2x #POS=adjectives ### in ### -source_file=${srcdir}/../../../src/fst/stems/${POS}.lexc -generator_file=./../../../src/generator-gt-norm -analyser_file=./../../../src/analyser-gt-norm +source_file=${srcdir}/../../../src/fst/morphology/stems/${POS}.lexc +generator_file=./../../../src/fst/generator-gt-norm +analyser_file=./../../../src/fst/analyser-gt-norm ### out ### # Temporary files: diff --git a/test/src/morphology/generate-noun-lemmas.sh.in b/test/src/morphology/generate-noun-lemmas.sh.in index dee51851..91318dd1 100755 --- a/test/src/morphology/generate-noun-lemmas.sh.in +++ b/test/src/morphology/generate-noun-lemmas.sh.in @@ -23,9 +23,9 @@ POS=N_mdf2x #POS=nouns ### in ### -source_file=${srcdir}/../../../src/fst/stems/${POS}.lexc -generator_file=./../../../src/generator-gt-norm -analyser_file=./../../../src/analyser-gt-norm +source_file=${srcdir}/../../../src/fst/morphology/stems/${POS}.lexc +generator_file=./../../../src/fst/generator-gt-norm +analyser_file=./../../../src/fst/analyser-gt-norm ### out ### # Temporary files: diff --git a/test/src/morphology/generate-propernoun-lemmas.sh.in b/test/src/morphology/generate-propernoun-lemmas.sh.in index 8bd4d4a3..99bb5c6d 100644 --- a/test/src/morphology/generate-propernoun-lemmas.sh.in +++ b/test/src/morphology/generate-propernoun-lemmas.sh.in @@ -23,9 +23,9 @@ POS=N_Prop_mdf2x #POS=propernouns ### in ### -source_file=${srcdir}/../../../src/fst/stems/${POS}.lexc -generator_file=./../../../src/generator-gt-norm -analyser_file=./../../../src/analyser-gt-norm +source_file=${srcdir}/../../../src/fst/morphology/stems/${POS}.lexc +generator_file=./../../../src/fst/generator-gt-norm +analyser_file=./../../../src/fst/analyser-gt-norm ### out ### # Temporary files: diff --git a/test/src/morphology/generate-verb-lemmas.sh.in b/test/src/morphology/generate-verb-lemmas.sh.in index 618f8834..3bb8430f 100644 --- a/test/src/morphology/generate-verb-lemmas.sh.in +++ b/test/src/morphology/generate-verb-lemmas.sh.in @@ -23,9 +23,9 @@ POS=V_mdf2x #POS=verbs ### in ### -source_file=${srcdir}/../../../src/fst/stems/${POS}.lexc -generator_file=./../../../src/generator-gt-norm -analyser_file=./../../../src/analyser-gt-norm +source_file=${srcdir}/../../../src/fst/morphology/stems/${POS}.lexc +generator_file=./../../../src/fst/generator-gt-norm +analyser_file=./../../../src/fst/analyser-gt-norm ### out ### # Temporary files: diff --git a/test/tools/spellcheckers/fstbased/desktop/hfst/accept-all-lemmas.sh.in b/test/tools/spellcheckers/fstbased/desktop/hfst/accept-all-lemmas.sh.in index 2155c83f..eceb38a2 100755 --- a/test/tools/spellcheckers/fstbased/desktop/hfst/accept-all-lemmas.sh.in +++ b/test/tools/spellcheckers/fstbased/desktop/hfst/accept-all-lemmas.sh.in @@ -17,7 +17,7 @@ GIELLA_LANG=@GTLANG2@ ospell=@HFST_OSPELL@ ### in ### -source_files=${srcdir}/../../../../../../src/fst/stems/*.lexc +source_files=${srcdir}/../../../../../../src/fst/morphology/stems/*.lexc speller_dir=./../../../../../../tools/spellcheckers ### out ###