From 85820a7bbe8f83efa2b8e086a40030a6edab1670 Mon Sep 17 00:00:00 2001 From: Flammie A Pirinen Date: Wed, 4 Oct 2023 15:03:53 +0200 Subject: [PATCH] [Template Merge] filters for tts build --- src/filters/Makefile.am.orig | 55 +++++++++++++++++++ src/filters/Makefile.am.rej | 15 +++++ src/filters/remove-DNorm-tags.regex | 18 ++++++ .../remove-derivation-position-tags.regex | 10 ++++ src/filters/remove-norm-comp-tags.regex | 23 ++++++++ src/filters/rename-POS_before_Der-tags.regex | 32 +++++++++++ 6 files changed, 153 insertions(+) create mode 100644 src/filters/Makefile.am.orig create mode 100644 src/filters/Makefile.am.rej create mode 100644 src/filters/remove-DNorm-tags.regex create mode 100644 src/filters/remove-derivation-position-tags.regex create mode 100644 src/filters/remove-norm-comp-tags.regex create mode 100644 src/filters/rename-POS_before_Der-tags.regex diff --git a/src/filters/Makefile.am.orig b/src/filters/Makefile.am.orig new file mode 100644 index 00000000..4100d44a --- /dev/null +++ b/src/filters/Makefile.am.orig @@ -0,0 +1,55 @@ +## Process this file with automake to produce Makefile.in + +## Copyright (C) 2011 Samediggi + +## This program is free software: you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation, either version 3 of the License, or +## (at your option) any later version. + +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. + +## You should have received a copy of the GNU General Public License +## along with this program. If not, see . + +######################################################### +############## BEGIN: Local modifications ############### + +# List any local filter regex files here: +GIELLA_FILTER_LOCAL_REGEX_SRCS=\ + downcase_UCletters.regex\ + allow_uppercase.regex\ + remove-bracketed-english-gloss.regex + +# List any local filter xfscript files here: +GIELLA_FILTER_LOCAL_XFSCRIPT_SRCS= + +# List any local filter lexc files here: +GIELLA_FILTER_LOCAL_LEXC_SRCS= + +# List any locally generated regex source files here: +GIELLA_FILTER_LOCAL_GENERATED_REGEX_SRCS= + +# List any locally generated xfscript source files here: +GIELLA_FILTER_LOCAL_GENERATED_XFSCRIPT_SRCS= + +# List any locally generated lexc source files here: +GIELLA_FILTER_LOCAL_GENERATED_LEXC_SRCS= + +# List any additional source files here, so that they are included in the dist. +# Source files that are not directly compiled to fst's but are instead used as +# part of a local build step should be listed here. +EXTRA_SRCS= + +########## Add local build rules below here: ############ + +############### END: Local modifications ################ +######################################################### + +# Included build file, where the actual build instructions are: +include $(top_srcdir)/../giella-core/am-shared/src-filters-dir-include.am + +# vim: set ft=automake: diff --git a/src/filters/Makefile.am.rej b/src/filters/Makefile.am.rej new file mode 100644 index 00000000..5ef8d2a4 --- /dev/null +++ b/src/filters/Makefile.am.rej @@ -0,0 +1,15 @@ +--- src/filters/Makefile.am ++++ src/filters/Makefile.am +@@ -20,7 +20,11 @@ + + # List any local filter regex files here: + GIELLA_FILTER_LOCAL_REGEX_SRCS=\ +- downcase_UCletters.regex ++ downcase_UCletters.regex\ ++ remove-derivation-position-tags.regex \ ++ remove-DNorm-tags.regex \ ++ remove-norm-comp-tags.regex \ ++ rename-POS_before_Der-tags.regex + + # List any local filter xfscript files here: + GIELLA_FILTER_LOCAL_XFSCRIPT_SRCS= diff --git a/src/filters/remove-DNorm-tags.regex b/src/filters/remove-DNorm-tags.regex new file mode 100644 index 00000000..456f3ed6 --- /dev/null +++ b/src/filters/remove-DNorm-tags.regex @@ -0,0 +1,18 @@ +! Divvun & Giellatekno - open source grammars for Sámi and other languages +! Copyright © 2000-2010 The University of Tromsø & the Norwegian Sámi Parliament +! http://giellatekno.uit.no & http://divvun.no +! +! This program is free software; you can redistribute and/or modify +! this file under the terms of the GNU General Public License as published by +! the Free Software Foundation, either version 3 of the License, or +! (at your option) any later version. The GNU General Public License +! is found at http://www.gnu.org/licenses/gpl.html. It is +! also available in the file $GTHOME/LICENSE.txt. +! +! Other licensing options are available upon request, please contact +! giellatekno@uit.no or feedback@divvun.no + +# This filter removes Rel-Foc combinations. + +0 <- %+Use%/NotDNorm, +0 <- %+Use%/DNorm; diff --git a/src/filters/remove-derivation-position-tags.regex b/src/filters/remove-derivation-position-tags.regex new file mode 100644 index 00000000..7f86484e --- /dev/null +++ b/src/filters/remove-derivation-position-tags.regex @@ -0,0 +1,10 @@ +# This regex removes tags we do not want in the ordinary $(GTLANG).fst +# That is, we do not give them as output from analysis + +# delete the tags governing normative/speller derivation restrictions +0 <- %+Der, +0 <- %+Der1, +0 <- %+Der2, +0 <- %+Der3, +0 <- %+Der4, +0 <- %+Der5; diff --git a/src/filters/remove-norm-comp-tags.regex b/src/filters/remove-norm-comp-tags.regex new file mode 100644 index 00000000..8f83ca91 --- /dev/null +++ b/src/filters/remove-norm-comp-tags.regex @@ -0,0 +1,23 @@ +# This regex removes tags we do not want in the ordinary $(GTLANG).fst +# That is, we do not give them as output from analysis + +# delete normative tags governing compound behaviour +# -N- in the tag name stands for Normative +0 <- %+CmpNP%/All, +0 <- %+CmpNP%/First, +0 <- %+CmpNP%/Pref, +0 <- %+CmpNP%/Last, +0 <- %+CmpNP%/Suff, +0 <- %+CmpNP%/None, +0 <- %+CmpNP%/Only, +0 <- %+CmpN%/SgN, +0 <- %+CmpN%/SgG, +0 <- %+CmpN%/PlG, +0 <- %+CmpN%/SgNomLeft, +0 <- %+CmpN%/SgGenLeft, +0 <- %+CmpN%/PlGenLeft, +0 <- %+CmpN%/Def, +0 <- %+CmpN%/DefSgNom, +0 <- %+CmpN%/DefSgGen, +0 <- %+CmpN%/DefPlGen +; diff --git a/src/filters/rename-POS_before_Der-tags.regex b/src/filters/rename-POS_before_Der-tags.regex new file mode 100644 index 00000000..9395a0c5 --- /dev/null +++ b/src/filters/rename-POS_before_Der-tags.regex @@ -0,0 +1,32 @@ +# This script renames POS tags when in front of derivations, such that +# only the final POS tag remains. This allows disambiguation to work +# properly also for derived words, without having to resort to post-processing +# hacks. The script is language specific. + +# POS tags changes: +[ "+Ex/Adv" <- "+Adv" , + "+Ex/N" <- "+N" , + "+Ex/A" <- "+A" , + "+Ex/Attr" <- "+Attr", + "+Ex/V" <- "+V" || +# change always and only when followed by a derivation: + _ \[ "+Cmp" ]* + [ "+Der" + | "+Der1" + | "+Der2" + | "+Der3" + | "+Der4" + | "+Der5" + ] +] .o. + +# Transitivity tag changes: +[ "+Ex/TV" <- "+TV" , + "+Ex/IV" <- "+IV" || +# change only within the same stem, and only when followed by +# a verbal derivation: + _ \[ "+Cmp" ]* + [ "+TV" + | "+IV" + ] +] ;