Skip to content

Commit

Permalink
[Template Merge] filters for tts build
Browse files Browse the repository at this point in the history
  • Loading branch information
flammie committed Oct 4, 2023
1 parent 000546b commit 85820a7
Show file tree
Hide file tree
Showing 6 changed files with 153 additions and 0 deletions.
55 changes: 55 additions & 0 deletions src/filters/Makefile.am.orig
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
## Process this file with automake to produce Makefile.in

## Copyright (C) 2011 Samediggi

## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.

## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with this program. If not, see <http://www.gnu.org/licenses/>.

#########################################################
############## BEGIN: Local modifications ###############

# List any local filter regex files here:
GIELLA_FILTER_LOCAL_REGEX_SRCS=\
downcase_UCletters.regex\
allow_uppercase.regex\
remove-bracketed-english-gloss.regex

# List any local filter xfscript files here:
GIELLA_FILTER_LOCAL_XFSCRIPT_SRCS=

# List any local filter lexc files here:
GIELLA_FILTER_LOCAL_LEXC_SRCS=

# List any locally generated regex source files here:
GIELLA_FILTER_LOCAL_GENERATED_REGEX_SRCS=

# List any locally generated xfscript source files here:
GIELLA_FILTER_LOCAL_GENERATED_XFSCRIPT_SRCS=

# List any locally generated lexc source files here:
GIELLA_FILTER_LOCAL_GENERATED_LEXC_SRCS=

# List any additional source files here, so that they are included in the dist.
# Source files that are not directly compiled to fst's but are instead used as
# part of a local build step should be listed here.
EXTRA_SRCS=

########## Add local build rules below here: ############

############### END: Local modifications ################
#########################################################

# Included build file, where the actual build instructions are:
include $(top_srcdir)/../giella-core/am-shared/src-filters-dir-include.am

# vim: set ft=automake:
15 changes: 15 additions & 0 deletions src/filters/Makefile.am.rej
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
--- src/filters/Makefile.am
+++ src/filters/Makefile.am
@@ -20,7 +20,11 @@

# List any local filter regex files here:
GIELLA_FILTER_LOCAL_REGEX_SRCS=\
- downcase_UCletters.regex
+ downcase_UCletters.regex\
+ remove-derivation-position-tags.regex \
+ remove-DNorm-tags.regex \
+ remove-norm-comp-tags.regex \
+ rename-POS_before_Der-tags.regex

# List any local filter xfscript files here:
GIELLA_FILTER_LOCAL_XFSCRIPT_SRCS=
18 changes: 18 additions & 0 deletions src/filters/remove-DNorm-tags.regex
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
! Divvun & Giellatekno - open source grammars for Sámi and other languages
! Copyright © 2000-2010 The University of Tromsø & the Norwegian Sámi Parliament
! http://giellatekno.uit.no & http://divvun.no
!
! This program is free software; you can redistribute and/or modify
! this file under the terms of the GNU General Public License as published by
! the Free Software Foundation, either version 3 of the License, or
! (at your option) any later version. The GNU General Public License
! is found at http://www.gnu.org/licenses/gpl.html. It is
! also available in the file $GTHOME/LICENSE.txt.
!
! Other licensing options are available upon request, please contact
! giellatekno@uit.no or feedback@divvun.no

# This filter removes Rel-Foc combinations.

0 <- %+Use%/NotDNorm,
0 <- %+Use%/DNorm;
10 changes: 10 additions & 0 deletions src/filters/remove-derivation-position-tags.regex
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# This regex removes tags we do not want in the ordinary $(GTLANG).fst
# That is, we do not give them as output from analysis

# delete the tags governing normative/speller derivation restrictions
0 <- %+Der,
0 <- %+Der1,
0 <- %+Der2,
0 <- %+Der3,
0 <- %+Der4,
0 <- %+Der5;
23 changes: 23 additions & 0 deletions src/filters/remove-norm-comp-tags.regex
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# This regex removes tags we do not want in the ordinary $(GTLANG).fst
# That is, we do not give them as output from analysis

# delete normative tags governing compound behaviour
# -N- in the tag name stands for Normative
0 <- %+CmpNP%/All,
0 <- %+CmpNP%/First,
0 <- %+CmpNP%/Pref,
0 <- %+CmpNP%/Last,
0 <- %+CmpNP%/Suff,
0 <- %+CmpNP%/None,
0 <- %+CmpNP%/Only,
0 <- %+CmpN%/SgN,
0 <- %+CmpN%/SgG,
0 <- %+CmpN%/PlG,
0 <- %+CmpN%/SgNomLeft,
0 <- %+CmpN%/SgGenLeft,
0 <- %+CmpN%/PlGenLeft,
0 <- %+CmpN%/Def,
0 <- %+CmpN%/DefSgNom,
0 <- %+CmpN%/DefSgGen,
0 <- %+CmpN%/DefPlGen
;
32 changes: 32 additions & 0 deletions src/filters/rename-POS_before_Der-tags.regex
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# This script renames POS tags when in front of derivations, such that
# only the final POS tag remains. This allows disambiguation to work
# properly also for derived words, without having to resort to post-processing
# hacks. The script is language specific.

# POS tags changes:
[ "+Ex/Adv" <- "+Adv" ,
"+Ex/N" <- "+N" ,
"+Ex/A" <- "+A" ,
"+Ex/Attr" <- "+Attr",
"+Ex/V" <- "+V" ||
# change always and only when followed by a derivation:
_ \[ "+Cmp" ]*
[ "+Der"
| "+Der1"
| "+Der2"
| "+Der3"
| "+Der4"
| "+Der5"
]
] .o.

# Transitivity tag changes:
[ "+Ex/TV" <- "+TV" ,
"+Ex/IV" <- "+IV" ||
# change only within the same stem, and only when followed by
# a verbal derivation:
_ \[ "+Cmp" ]*
[ "+TV"
| "+IV"
]
] ;

0 comments on commit 85820a7

Please sign in to comment.