-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Created a XFSCRIPT for demoing morpheme boundaries.
- Loading branch information
Showing
1 changed file
with
154 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
# Demo script on morpheme boundary marking | ||
|
||
read lexc morphology/stems/verb_stems.lexc | ||
define Stems | ||
|
||
read lexc morphology/affixes/verb_inner_affixes.lexc | ||
define InnerAffixes | ||
|
||
read lexc morphology/affixes/verb_middle_affixes.lexc | ||
define MiddleAffixes | ||
|
||
read lexc morphology/affixes/verb_outer_affixes.lexc | ||
define OuterAffixes | ||
|
||
read lexc morphology/affixes/verb_oblique_affixes.lexc | ||
define ObliqueAffixes | ||
|
||
read lexc morphology/affixes/postverbal_affixes.lexc | ||
define PostverbalAffixes | ||
|
||
# Mark prefix type (insert corresponding flags to the | ||
# original prefixes we know to be present from the lexical entry. | ||
# ab=cd_ef.gh => ab@P.PREFIX.OUTER@=cd@P.PREFIX.MIDDLE@_ef@P.PREFIX.INNER@.gh | ||
|
||
define MarkPrefixes ~$["@P.PREFIX.OUTER@"|"@P.PREFIX.INNER@"|"@P.PREFIX.MIDDLE@"] .o. | ||
"=" -> "@P.PREFIX.OUTER@" "=" , | ||
"_" -> "@P.PREFIX.MIDDLE@" "_" , | ||
"." -> "@P.PREFIX.INNER@" "."; | ||
|
||
# Insert . (inner), _ (middle), and = (outer) if missing in the intermediate rep: | ||
# tsiy > =_.tsiy | ||
# ts'á=zíd > ts'á=_.zíd | ||
# tsí=di.tł'á > tsí=_di.tł'á | ||
# gu.blah > =_gu.blah | ||
# gu_blah > =gu_.blah | ||
|
||
# (1) No . > insert . after last marker (= or _), or in the beginning if none exists | ||
# (2) No _ > insert _ (a) after =, if one exists, or (b) beginning | ||
# (3) No = > insert at beginning | ||
|
||
define InsInner [..] -> "." || "_" _ ~$"." .#. .o. | ||
[..] -> "." || "=" _ ~$"." .#. .o. | ||
[..] -> "." || .#. _ ~$"." .#. ; | ||
|
||
define InsMiddle [..] -> "_" || "=" _ ~$["_"] .#. .o. | ||
[..] -> "_" || .#. _ ~$["_"] .#. ; | ||
|
||
define InsOuter [..] -> "=" || .#. _ ~$["="] .#. ; | ||
|
||
# Some lexical entries include a "^L" immediately before the inner-prefix | ||
# boundary marker "." (e.g., xá=_^L.ʔò "take it [solid object] out", or | ||
# ta=_di^L.ʔò "lift/pick/hold it [solid object] up"), with or without | ||
# any other inner prefixes present. In these (rare) cases, Tsuut'ina uses | ||
# middle prefix TAMA allomorphs, rather than outer or inner prefix forms, e.g. | ||
# | ||
# 2SG xáaʔò "you (sg.) will carry it (solid object) past" | ||
# (not *xaniʔò or something similar if 0-IPFV outer or no | ||
# preceding prefix allomorphs were used) | ||
# 1PL xáasaàʔò "we will carry it (solid object) past" | ||
# (not *xaàʔò or something similar if 0-IPFV inner prefix | ||
# allomorphs were used) | ||
# | ||
# 2SG tadiʔò "you (sg.) will lift it (solid object) up" | ||
# (not *tadiniʔò if 0-IPFV outer or no preceding prefix | ||
# allomorphs were used) | ||
# 1PL tadìsaàʔò "we will lift it (solid object) up" | ||
# (not *tadaàʔò or something similar if 0-IPFV inner | ||
# prefix allomorphs were used) | ||
# | ||
# This rule therefore turns @P.PREFIX.INNER@ into @P.PREFIX.MIDDLE@ to ensure | ||
# that middle-prefix allomorphs are used, then sets an additional flag (@P. | ||
# LOWTONE.ON@) to help differentiate between this situation and all other | ||
# middle-prefix contexts (for use in 'affixes/verb_inner_affixes.lexc'). | ||
define LInnerPrefixAllomorphs "@P.PREFIX.INNER@" -> | ||
"@P.PREFIX.MIDDLE@" "@P.LOWTONE.ON@" || "^L" _ ; | ||
|
||
# Some middle and inner prefixes (e.g., middle íH- conative/half-transitive, | ||
# inner ná-) appear with outer prefix TAMA chunk allomorphs. In order to | ||
# get this right, we append the symbol "^O" after the vowel in the lexical | ||
# entry (e.g., í^H^O-, ná^O-), then turn that into a flag that requires outer | ||
# prefix allomorphs. | ||
define RequireOuterAllomorphs "^O" "@P.PREFIX.INNER@" -> "@P.PREFIX.OUTER@" .o. | ||
"^O" "@P.PREFIX.MIDDLE@" -> "@P.PREFIX.OUTER@" .o. | ||
"^O" -> "@P.PREFIX.OUTER@"; | ||
|
||
# We temporarily keep the boundary symbol for inner affixes ("."), middle | ||
# affixes ("_"), and outer affixes ("=") in place so that we can target | ||
# morphophonology in each position more easily (especially when aiming to drop | ||
# the "weak" /i/ vowels that appear in inner lexical prefixes). | ||
read regex [Stems PostverbalAffixes] .o. | ||
MarkPrefixes .o. | ||
InsInner .o. InsMiddle .o. InsOuter .o. | ||
LInnerPrefixAllomorphs .o. RequireOuterAllomorphs .o. | ||
"." -> "." InnerAffixes "@P.PREFIX.INNER@" , | ||
"_" -> "_" MiddleAffixes "@P.PREFIX.MIDDLE@" , | ||
"=" -> "=" OuterAffixes "@P.PREFIX.OUTER@" ; | ||
define WordForms; | ||
|
||
# Rewrite rule for resurrecting the prefix boundary markers | ||
|
||
define ShowBoundaries [ "@P.PREFIX.INNER@" -> "." , | ||
"@P.PREFIX.MIDDLE@" -> "_" , | ||
"@P.PREFIX.OUTER@" -> "=" | ||
]; | ||
|
||
define SimplifyBoundaries [ [ "=" "=" -> "=" , "_" "_" -> "_" , "." "." -> "." ] | ||
.o. "=" "_" "." -> "=" | ||
.o. "_" "." -> "_" | ||
.o. "=" "_" -> "=" | ||
.o. [ "=" | "_" | "." ] -> 0 || .#. _ | ||
]; | ||
|
||
# Concatenate ObliqueAffixes and the other inflectional FSTs. | ||
read regex WordForms .o. [..] -> ObliqueAffixes || .#. _; | ||
|
||
twosided flag-diacritics | ||
define Grammar; | ||
|
||
# Morphophonology | ||
|
||
source morphology/phonology.xfscript | ||
define MorphoPhonology; | ||
|
||
read lexc morphology/affixes/verb_tags.lexc | ||
define Tags; | ||
|
||
# To prevent morphophonology to be tripped up be intervening flags | ||
set flag-is-epsilon ON | ||
|
||
# regex Grammar Tags; | ||
|
||
# Tentative code for dealing with Morphophonology as read for a separate | ||
# file. Though we would want to figure a way to compose each rewrite rule | ||
# the the morphological component one-by-one for faster compilation. | ||
|
||
regex [Grammar Tags] .o. MorphoPhonology ; | ||
define VerbModel | ||
|
||
# regex [Grammar Tags] .o. deletePrefixI .o. hToneSpreading .o. hToneSpreadingCleanup .o. deleteBoundarySymbol .o. uBeforeA .o. aBeforeI .o. lowABeforeI .o. iBeforeA .o. iBeforeO .o. uBeforeO .o. lInitialStemsSbjPl2 .o. lInitialStemsSbjPl2Cleanup .o. slDissimilation .o. zhDevoicing .o. zDevoicing; | ||
|
||
# Make flags visible, so that they can be converted to explicit boundary markers | ||
set flag-is-epsilon OFF | ||
|
||
# Output boundary markers based on flags | ||
|
||
regex VerbModel .o. ShowBoundaries ; | ||
define VerbModelWithBound | ||
|
||
# Make flags invisible again, so that they will not intervene in removing excessive boundary markers | ||
set flag-is-epsilon ON | ||
|
||
regex VerbModelWithBound .o. SimplifyBoundaries ; | ||
|
||
twosided flag-diacritics |