From 66d83060049ca7edb90f83f80baa54c22bef0fa0 Mon Sep 17 00:00:00 2001 From: Jack Rueter Date: Mon, 1 Jul 2024 16:52:16 +0300 Subject: [PATCH] continue work with guesser and correct occasional contlex --- src/fst/morphology/root.lexc | 253 +++++++++++++++---- src/fst/morphology/stems/adjectives.lexc | 2 +- src/fst/morphology/stems/adjectives.xml | 2 +- src/fst/morphology/stems/nouns.lexc | 6 +- src/fst/morphology/stems/nouns.xml | 6 +- src/fst/morphology/stems/nouns_newwords.lexc | 28 ++ src/fst/morphology/stems/propernouns.lexc | 2 +- src/fst/morphology/stems/propernouns.xml | 2 +- 8 files changed, 246 insertions(+), 55 deletions(-) diff --git a/src/fst/morphology/root.lexc b/src/fst/morphology/root.lexc index 853c2c76..91cb588d 100644 --- a/src/fst/morphology/root.lexc +++ b/src/fst/morphology/root.lexc @@ -571,7 +571,7 @@ Multichar_Symbols +N_42_maksaa +N_43_keera +N_44_joora -!+N_45_puuola single word ++N_45_puuola single word +N_46_aaita +N_47_uushka +N_48_mooka @@ -580,59 +580,222 @@ Multichar_Symbols +N_50_sushaa +N_51_kupshaa +N_52_tshuurtsha -+N_53_seemda ++N_53_seemdja +N_54_tiiera +N_55_krooipa +N_56_lilla -+N_57_kutaa ++N_57_kutjaa +N_58_neetsha +N_59_kiqv -+N_60_ -+N_61_ -+N_62_ -+N_63_ -+N_64_ -+N_65_ -+N_66_ -+N_67_ -+N_68_ -+N_69_ - -+N_70_ -+N_71_ -+N_72_ -+N_73_ -+N_74_ -+N_75_ -+N_76_ -+N_77_ -+N_78_ -+N_79_ - -+N_80_ -+N_81_ -+N_82_ -+N_83_ -+N_84_ -+N_85_ -+N_86_ -+N_87_ -+N_88_ -+N_89_ - -+N_90_ -+N_91_ -+N_92_ -+N_93_ -+N_94_ -+N_95_ -+N_96_ -+N_97_ -+N_98_ ++N_60_kuoqig ++N_61_kep ++N_62_krutjk ++N_63_kraemp ++N_64_piqnj ++N_65_kuotj ++N_66_vaktj ++N_67_renj ++N_68_oksh ++N_69_kash + ++N_70_dunjtsh ++N_71_torii ++N_72_vaqg ++N_73_koql ++N_74_suqg ++N_75_joqug ++N_76_niqm ++N_77_usk ++N_78_vashk ++N_79_tup + ++N_80_maks ++N_81_luqm ++N_82_mieqr ++N_83_mieqr ++N_84_muqr ++N_85_kuoqd ++N_86_suqzh ++N_87_keqzh ++N_88_veqzh ++N_89_kuqzh + ++N_90_ruzuu ++N_91_radio ++N_92_ang ++N_93_kouv ++N_94_kand ++N_95_port ++N_96_nurjm ++N_97_laint ++N_98_loul +N_99_ul ++N_100_pan +N_101_täm ++N_102_ ++N_103_ ++N_104_ ++N_105_ ++N_106_ ++N_107_ ++N_108_ ++N_109_ + ++N_110_ ++N_111_ ++N_112_ ++N_113_ ++N_114_ ++N_115_ ++N_116_ ++N_117_ ++N_118_ ++N_119_ + ++N_120_ ++N_121_ ++N_122_ ++N_123_ ++N_124_ ++N_125_ ++N_126_ ++N_127_ ++N_128_ ++N_129_ + ++N_130_ ++N_131_ ++N_132_ ++N_133_ ++N_134_ ++N_135_ +N_136_tas ++N_137_ ++N_138_ ++N_139_ + ++N_140_ ++N_141_ ++N_142_ ++N_143_ ++N_144_ ++N_145_ ++N_146_ ++N_147_ ++N_148_ ++N_149_ + ++N_150_ ++N_151_ ++N_152_ ++N_153_ ++N_154_ ++N_155_ ++N_156_ ++N_157_ ++N_158_ ++N_159_ + ++N_160_ ++N_161_ ++N_162_ ++N_163_ ++N_164_ ++N_165_ ++N_166_ ++N_167_ ++N_168_ ++N_169_ + ++N_170_ ++N_171_ ++N_172_ ++N_173_ ++N_174_ ++N_175_ ++N_176_ ++N_177_ ++N_178_ ++N_179_ + ++N_180_ ++N_181_ ++N_182_ ++N_183_ ++N_184_ ++N_185_ ++N_186_ ++N_187_ ++N_188_ ++N_189_ + ++N_190_ ++N_191_ ++N_192_ ++N_193_ ++N_194_ ++N_195_ ++N_196_ ++N_197_ ++N_198_ ++N_199_ + ++N_200_ ++N_201_ ++N_202_ ++N_203_ ++N_204_ ++N_205_ ++N_206_ ++N_207_ ++N_208_ ++N_209_ + ++N_210_ ++N_211_ ++N_212_ ++N_213_ ++N_214_ ++N_215_ ++N_216_ ++N_217_ ++N_218_ ++N_219_ + ++N_220_ ++N_221_ ++N_222_ ++N_223_ ++N_224_ ++N_225_ ++N_226_ ++N_227_ ++N_228_ ++N_229_ + ++N_230_ ++N_231_ ++N_232_ ++N_233_ ++N_234_ ++N_235_ ++N_236_ ++N_237_ ++N_238_ ++N_239_ + ++N_240_ ++N_241_ ++N_242_ ++N_243_ ++N_244_ ++N_245_ ++N_246_ ++N_247_ ++N_248_ ++N_249_ + !! ## Flag diacritics !! We have manually optimised the structure of our lexicon using following diff --git a/src/fst/morphology/stems/adjectives.lexc b/src/fst/morphology/stems/adjectives.lexc index da2cf13a..3d61f7d3 100644 --- a/src/fst/morphology/stems/adjectives.lexc +++ b/src/fst/morphology/stems/adjectives.lexc @@ -597,7 +597,7 @@ loptāmõt+A:loptāmõt A_-ZERO "loputon" ; lotšõn+A:lotšõn A_APPÕN "lurpallaan" ; lougli+A:lougli A_TŪĻI "hidas" ; loddõr+A:loʼddõr A_BÄʼZMÕR "laiskotteleva" ; -loḑ+A:loʼḑ A_PIʼŅ "lokainen" ; +loḑ+A:loʼḑḑ A_PIʼŅ "lokainen" ; loḑ+A:loʼḑ A_PIʼŅ "veltto" ; luini+A:luini A_TŪĻI "luinen" ; lumīzvālda+A:lumīz#vālda A_ĀITA "lumivalkoinen" ; diff --git a/src/fst/morphology/stems/adjectives.xml b/src/fst/morphology/stems/adjectives.xml index 95432b39..576a12b2 100644 --- a/src/fst/morphology/stems/adjectives.xml +++ b/src/fst/morphology/stems/adjectives.xml @@ -15693,7 +15693,7 @@ loḑ - loʼḑ + loʼḑḑ loʼḑ 2 diff --git a/src/fst/morphology/stems/nouns.lexc b/src/fst/morphology/stems/nouns.lexc index 2312ac11..a9b3212a 100644 --- a/src/fst/morphology/stems/nouns.lexc +++ b/src/fst/morphology/stems/nouns.lexc @@ -3514,7 +3514,7 @@ lejavõtāmõd+N:leja#võtām N_SIDĀM-PL "pääsiäinen" ; lejā+N:leʼj N_TUBĀ "liha" ; lejākudūd+N:lejā#kuʼd N_NIʼM-PL "lihaskudos" ; lejākīntš+N:lejā#kīn N_KĪNTŠ "hevosen kavio" ; -lek+N:lelk N_KEP "vuoto" ; +lek+N:lekk N_KEP "vuoto" ; lektsij+N:lektsij N_KǬJ "luento" ; lektār+N:lektār N_TIDĀR "suppilo" ; lem+N:lem N_KIM "lämpö" ; @@ -4152,7 +4152,7 @@ minerāl+N:minerāl N_ANALĪZ "kivennäinen" ; minerālain+N:minerāl#ain N_KAND "kivennäisaine" ; minerālappõn+N:minerāl#appõn N_APPÕN "mineraalihappo" ; minerāleļļõ+N:minerāl#eļļõ N_SIELDÕ "mineraaliöljy" ; -minerālmōļ+N:minerāl#mǭļ N_NIʼM "mineraalimaali" ; +minerālmōļ+N:minerāl#mǭļ N_GĒRMAŅ "mineraalimaali" ; minerālpūdõr+N:minerāl#pūdõr N_KĪNDÕR "mineraalipuuteri" ; minerālsūol+N:minerāl#sūol N_IRM "mineraalisuola" ; minerālsūolõ+N:minerāl#sūolõ N_SIELDÕ "mineraalisuola" ; @@ -7819,7 +7819,7 @@ varūks+N:varūks N_KATŪKS "varpe" ; varūon+N:varūon N_SIDĀM "farao" ; vasti+N:vasti N_KAȚKI "vastustaja" ; vastit+N:vastit N_ĀIGAST "vastakkainasettelu" ; -vastõkil+N:vastõ#kiʼl N_VAʼG "vastakaiku" ; +vastõkil+N:vastõ#kiʼl N_KÕʼL "vastakaiku" ; vastõkuostāji+N:vastõ#kuostāji N_SĒJI "vastuuhenkilö" ; vastõm+N:vastõm N_KǬRAND "vastakohta" ; vastõmit+N:vastõmit N_AMĀT "vastakohtaisuus" ; diff --git a/src/fst/morphology/stems/nouns.xml b/src/fst/morphology/stems/nouns.xml index 757a692d..6c12f6c3 100644 --- a/src/fst/morphology/stems/nouns.xml +++ b/src/fst/morphology/stems/nouns.xml @@ -80975,7 +80975,7 @@ lek - lelk + lekk lek @@ -95894,7 +95894,7 @@ minerālmōļ - minerāl#mǭļ + minerāl#mǭļ @@ -182057,7 +182057,7 @@ vastõkil - vastõ#kiʼl + vastõ#kiʼl vastõ|kiʼl diff --git a/src/fst/morphology/stems/nouns_newwords.lexc b/src/fst/morphology/stems/nouns_newwords.lexc index 88ef39c8..824406ec 100644 --- a/src/fst/morphology/stems/nouns_newwords.lexc +++ b/src/fst/morphology/stems/nouns_newwords.lexc @@ -47,12 +47,40 @@ test:test N_ "" ; < [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [a|e|i|o|õ|u|i e] [k l|k s|k t|p l|p ļ|p s n|p s t|p š|s k|s t|t k|t s|t š k|ț k|ț ļ] ā "+N":0 "+N_42_maksaa":0 > N_MAKSĀ ; < [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [ē|ī|ū o|ī e|ǟ|ū] [l 0:"l"|ļ 0:"ļ"|m 0:"m"|n 0:"n"|ņ 0:"ņ"|r 0:"r"|ŗ 0:"ŗ"] a "+N":0 "+N_43_keera":0 > N_KĒRA ; < [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [ǭ|ī] [l 0:"l"|ļ 0:"ļ"|m 0:"m"|n 0:"n"|ņ 0:"ņ"|r 0:"r"|ŗ 0:"ŗ"] a "+N":0 "+N_44_joora":0 > N_JǬRA ; +< [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [m|p|v] [ū o] [l 0:"l"| ļ 0:"ļ"|n 0:"n"| ņ 0:"ņ" | r 0:"r"|ŗ 0:"ŗ"] a "+N":0 "+N_45_puuola":0 > N_PŪOLA ; < [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [ā|ǟ|ī|ȱ|ȭ|ū|ō|ǭ|ū o] [i b|i g|i g m|i k|i m|i p|i r|i t|l b|l d|l m|l p|l t s|l z|ļ m|m p|n d|n g|n k|n d l|n t|ņ ḑ|ņ k|ņ ț|r b|r k|r d|r l|r n|r s|ŗ g|ŗ k|r p|r f|r z|i s k|r s k|s k|d] a "+N":0 "+N_46_aaita":0 > N_ĀITA ; < [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [ā|ū] 0:"i" š:"s" k a "+N":0 "+N_47_uushka":0 > N_ŪŠKA ; < [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [ō|ȭ] 0:"u" k a "+N":0 "+N_48_mooka":0 > N_MȬKA ; < [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [a|i|o] 0:"ʼ" d ž ā "+N":0 "+N_49_dadzhaa":0 > N_DADŽĀ ; < [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [u] š 0:"š" ā "+N":0 "+N_50_sushaa":0 > N_SUŠĀ ; < [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [u] p š ā "+N":0 "+N_51_kupshaa":0 > N_KUPŠĀ ; +< [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [ī|ū] r t š a "+N":0 "+N_52_tshuurtsha":0 > N_TŠŪRTŠA ; +< [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [ē] m ḑ a "+N":0 "+N_53_seemdja":0 > N_SĒMḐA ; +< [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [ē] [l 0:"l"| ļ 0:"ļ"|n 0:"n"| ņ 0:"ņ" | r 0:"r"|ŗ 0:"ŗ"] a "+N":0 "+N_54_tiiera":0 > N_TĪERA ; +< [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [f|k|m|p|r|v] a "+N":0 "+N_55_krooipa":0 > N_KRǬIPA ; +< [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [[a|i|u] [l l | m m|n n]|[ō] [z]] a "+N":0 "+N_56_lilla":0 > N_LILLA ; +< [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [a|ä|ü|e|i|o|ȯ|ǫ|õ] [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž]+ ā "+N":0 "+N_57_kutjaa":0 > N_KUȚĀ ; +< [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [ā|ǟ|ǖ|ē|ī|ō|ȱ|ǭ|ȭ] [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž]+ a "+N":0 "+N_58_neetsha":0 > N_NĒTŠA ; +< [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [a|ä|e|i|õ] 0:"ʼ" [b 0:"b"| g 0:"g"|v 0:"v"] "+N":0 "+N_59_kiqv":0 > N_KIʼV ; +< [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* u o 0:"ʼ" i g "+N":0 "+N_60_kuoqig":0 > N_KUOʼIG ; +< [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [a|ä|e|i|o|u] [k 0:"k"| p 0:"p"|t 0:"t"] "+N":0 "+N_61_kep":0 > N_KEP ; +< [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [a|e|i|u] [š k|š t|t s|ț k] "+N":0 "+N_62_krutjk":0 > N_KRUȚK ; +< [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* ä:"ǟ" m p "+N":0 "+N_63_kraemp":0 > N_KRÄMP ; +< [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [a|e|i|o|õ|u] 0:"ʼ" [ḑ 0:"ḑ"| ļ 0:"ļ"| ņ 0:"ņ" |ŗ 0:"ŗ"] "+N":0 "+N_64_piqnj":0 > N_PIʼŅ ; +< [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [a|e|o|u o] ț 0:"ț" "+N":0 "+N_65_kuotj":0 > N_KUOȚ ; +< [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [a|ä|e|i|o|õ|u] [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž]+ ț "+N":0 "+N_66_vaktj":0 > N_VAKȚ ; +< [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* e:"ē" [ḑ 0:"ḑ"| ļ 0:"ļ"| ņ 0:"ņ" |ŗ 0:"ŗ"] "+N":0 "+N_67_renj":0 > N_REŅ ; +< [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [a|ä|e|i|o|õ|u] [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž]+ š "+N":0 "+N_67_oksh":0 > N_OKŠ ; +< [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [a|ä|e|i|o|õ|u] š 0:"š" "+N":0 "+N_68_kash":0 > N_KAŠ ; +< [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [i:"ī"|o:"ō"|u:"ū"] [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž]+ š "+N":0 "+N_69_dunjtsh":0 > N_DUŅTŠ ; +< [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž]+ [ī|i] "+N":0 "+N_70_torii":0 > N_TORĪ ; +< [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [a|ä|e|i|o|õ|u|i e|ȯ] 0:"ʼ" [b|g|j|l|m] "+N":0 "+N_72_vaqg":0 > N_VAʼG ; +< [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [a|ä|e|i|o|õ|u|i e|ȯ] 0:"ʼ" [b|g|j|l|m] "+N":0 "+N_73_vaqg":0 > N_VAʼL ; +< [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [a|ä|o|u] 0:"ʼ" [b|g|r] "+N":0 "+N_74_suqg":0 > N_SUʼG ; +< [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* o 0:"ʼ" u g "+N":0 "+N_75_joqug":0 > N_JOʼUG ; +< [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [a|ä|e|i|o|ȯ|õ|u] 0:"ʼ" [b|d|g|l|ļ|m|r|z] "+N":0 "+N_76_niqm":0 > N_NIʼM ; +< [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [a|ä|e|i|o|ȯ|õ|u] [k t|k ț|s k|š k|t k] "+N":0 "+N_77_usk":0 > N_USK ; +< [b|c|č|d|ḑ|f|g|ģ|h|j|k|ķ|l|ļ|m|n|ņ|p|q|r|ŗ|s|š|t|ț|v|w|x|z|ž|a|ā|ä|ǟ|ü|ǖ|e|ē|i|ī|o|ō|ȯ|ȱ|ǫ|ǭ|ȭ|õ]* [a|ä|e|i|o|ȯ|õ|u] s:"š" k "+N":0 "+N_78_vashk":0 > N_VASHK ; diff --git a/src/fst/morphology/stems/propernouns.lexc b/src/fst/morphology/stems/propernouns.lexc index 18c2e49b..2c5accaa 100644 --- a/src/fst/morphology/stems/propernouns.lexc +++ b/src/fst/morphology/stems/propernouns.lexc @@ -170,7 +170,7 @@ Brēmen+N+Prop:Brēmen PROP_PĒGAL "Bremen" ; Pētõr+N+Prop:Pētõr PROP_KĪNDÕR "Pētõr" ; Kōrli+N+Prop:Kōrli PROP_SĒMI "Kōrli" ; Babā+N+Prop:Baʼbbā PROP_PADĀ "Babā" ; -Pritš+N+Prop:Pritš PROP_DUŅTŠ "Pritš" ; +Pritš+N+Prop:Prītš PROP_DUŅTŠ "Pritš" ; Alfrēd+N+Prop:Alfrēd PROP_AMĀT "Alfrēd" ; Didžā+N+Prop:Diʼdžā PROP_DADŽĀ "Didžā" ; Didrõk+N+Prop:Didrõk PROP_TŪOITÕG "Didrõk" ; diff --git a/src/fst/morphology/stems/propernouns.xml b/src/fst/morphology/stems/propernouns.xml index 4c861d66..4e87b026 100644 --- a/src/fst/morphology/stems/propernouns.xml +++ b/src/fst/morphology/stems/propernouns.xml @@ -3758,7 +3758,7 @@ PētõrPētõrYYPētõrPētõrPēteris KōrliKōrliYYKōrliKōrliKārlis BabāBaʼbbāYYBabāBabāBabā -PritšPritšYYPritšPritšFricis +PritšPrītšYYPritšPritšFricis AlfrēdAlfrēdYYAlfrēdAlfrēdAlfrēds DidžāDiʼdžāYYDidžāDidžāDidzis DidrõkDidrõkYYDidrõkDidrõkDidriķis