Skip to content

Commit

Permalink
Põhja, Loode etc can prefix a geoname
Browse files Browse the repository at this point in the history
  • Loading branch information
merisiga committed Jun 14, 2024
1 parent ba18a4d commit 21615e3
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 13 deletions.
14 changes: 1 addition & 13 deletions src/fst/morphology/root.lexc
Original file line number Diff line number Diff line change
Expand Up @@ -1038,6 +1038,7 @@ LEXICON SingleWords
Conjunctions ; !!= * `@CODE@`
NoninflectingVerbs ; !!= * `@CODE@`
NoninflectingAdjectiveVa ; !!= * `@CODE@`
ProperNounsGeoPrefixed ; !!= * `@CODE@` Kagu-Läti, Kesk-Argentiina etc
NonCompoundingAdverbs ; !!= * `@CODE@`
AdverbsLast ; !!= * `@CODE@` an adverb that may be either a simplex word, or the second part of a compound

Expand Down Expand Up @@ -1157,10 +1158,6 @@ LEXICON Latter !!= * `@CODE@` the latter part of a compound
@R.POS.AComp@@R.Case.Gen@ LatterAdjective_ne ; ! enamaruuduline, enimaruuduline

@R.POS.Adv@ LatterAdjective_v ; ! harvanähtavaid etc
! @R.POS.Adv@@R.Stem.topelt@ LatterAdjective ; ! topelthallikas etc
! @R.POS.Adv@@R.Stem.vähe@ LatterAdjective ; ! vähehallikas etc
! @R.POS.Adv@@R.Stem.üle@ LatterAdjective ; ! ülehallikas etc
! @R.POS.Adv@@D.Stem.topelt@@D.Stem.vähe@@D.Stem.üle@ LatterAdjective_v ; ! harvanähtavaid etc

! x + noun
@R.POS.ACRMinus@ LatterNoun ;
Expand All @@ -1185,11 +1182,6 @@ LEXICON Latter !!= * `@CODE@` the latter part of a compound

@R.Der.us@@R.Case.Nom@@D.Stem.Nom@ LatterNoun ; ! funktsionaalsusriiul

! @R.POS.Adv@@R.Stem.topelt@ LatterNoun ; ! topeltriiul etc
! @R.POS.Adv@@R.Stem.vähe@@P.NeedAdj.On@@D.NeedDerNe@ LatterNoun ; ! vähe + kalor +Der/line; not väheautone
! @R.POS.Adv@@R.Stem.üle@ LatterNoun ; ! ületoodang etc
! @R.POS.Adv@@R.Stem.üle@ LatterProperNounGeo ; ! ülesiberiline

! x + deverbal noun (in addition to x + noun)

@R.POS.N@@R.Case.Sem@ LatterDeverbal ; ! mägedesttulek
Expand All @@ -1212,10 +1204,6 @@ LEXICON Latter !!= * `@CODE@` the latter part of a compound
@R.POS.ACR@@R.Case.Nom@@P.POS.Num@@P.Part.Three@ CardinalNumbersInCompFin ; ! E10

! x + adverb
! @R.POS.Pref@@P.POS.Adv@@P.NeedPart.Three@ CompoundingAdverbs ; ! era+ratsa+baas, era+järel+valvama+Der/ja
! @R.POS.N@@R.Case.Nom@@R.Stem.Nom@@P.POS.Adv@@C.Case@@P.NeedPart.Three@ CompoundingAdverbs ; ! ehitus+järel+valvama+Der/ja
! @R.POS.N@@R.Case.Gen@@P.POS.Adv@@C.Case@@P.NeedPart.Three@ CompoundingAdverbs ; ! ehitus+järel+valvama+Der/ja

@R.POS.Pref@@P.POS.Adv@@P.NeedPart.Three@ AdverbsFirst ; ! era+ratsa+baas, era+järel+valvama+Der/ja
@R.POS.Pref@@P.POS.Adv@@P.NeedPart.Three@ AdverbsFirstOrLast ; ! era+hästi+valvama+Der/ja
@R.POS.N@@R.Case.Nom@@R.Stem.Nom@@P.POS.Adv@@C.Case@@P.NeedPart.Three@ AdverbsFirst ; ! ehitus+järel+valvama+Der/ja
Expand Down
17 changes: 17 additions & 0 deletions src/fst/morphology/stems/propernouns.lexc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,23 @@ LEXICON ProperNouns
ProperNounsOther ;


LEXICON ProperNounsGeoPrefixed
Alam-:Alam-# ProperNounsGeo ;
Edela-:Edela-# ProperNounsGeo ;
Ida-:Ida-# ProperNounsGeo ;
Indo-:Indo-# ProperNounsGeo ;
Kagu-:Kagu-# ProperNounsGeo ;
Kesk-:Kesk-# ProperNounsGeo ;
Kirde-:Kirde-# ProperNounsGeo ;
Ladina-:Ladina-# ProperNounsGeo ;
Loode-:Loode-# ProperNounsGeo ;
Lääne-:Lääne-# ProperNounsGeo ;
Lõuna-:Lõuna-# ProperNounsGeo ;
Põhja-:Põhja-# ProperNounsGeo ;
Suur-:Suur-# ProperNounsGeo ;
Väike-:Väike-# ProperNounsGeo ;
Ülem-:Ülem-# ProperNounsGeo ;

LEXICON ProperNounsGeo

Aachen+N+Prop:˘Aachen REDEL "weight: 11 " ;
Expand Down
21 changes: 21 additions & 0 deletions src/import/fsgt2final.sh
Original file line number Diff line number Diff line change
Expand Up @@ -684,6 +684,27 @@ LC_COLLATE=C join -a 1 -a 2 -t "_" -e "###" -o 1.2 2.1 2.2 names.tmp1 proper.tmp
> proper.tmp2

echo 'LEXICON ProperNouns\n ProperNounsGeo ;\n ProperNounsPersons ;\n ProperNounsOther ;\n' > propernouns.protolexc

echo '\nLEXICON ProperNounsGeoPrefixed' >> propernouns.protolexc

# loend8 originates from Vabamorf; the folowing command creates the following entries
# cat loend8 | sed "s/^.*$/echo ' &-:&-# ProperNounsGeo ; ' >> propernouns.protolexc/"
echo ' Alam-:Alam-# ProperNounsGeo ; ' >> propernouns.protolexc
echo ' Edela-:Edela-# ProperNounsGeo ; ' >> propernouns.protolexc
echo ' Ida-:Ida-# ProperNounsGeo ; ' >> propernouns.protolexc
echo ' Indo-:Indo-# ProperNounsGeo ; ' >> propernouns.protolexc
echo ' Kagu-:Kagu-# ProperNounsGeo ; ' >> propernouns.protolexc
echo ' Kesk-:Kesk-# ProperNounsGeo ; ' >> propernouns.protolexc
echo ' Kirde-:Kirde-# ProperNounsGeo ; ' >> propernouns.protolexc
echo ' Ladina-:Ladina-# ProperNounsGeo ; ' >> propernouns.protolexc
echo ' Loode-:Loode-# ProperNounsGeo ; ' >> propernouns.protolexc
echo ' Lääne-:Lääne-# ProperNounsGeo ; ' >> propernouns.protolexc
echo ' Lõuna-:Lõuna-# ProperNounsGeo ; ' >> propernouns.protolexc
echo ' Põhja-:Põhja-# ProperNounsGeo ; ' >> propernouns.protolexc
echo ' Suur-:Suur-# ProperNounsGeo ; ' >> propernouns.protolexc
echo ' Väike-:Väike-# ProperNounsGeo ; ' >> propernouns.protolexc
echo ' Ülem-:Ülem-# ProperNounsGeo ; ' >> propernouns.protolexc

echo '\nLEXICON ProperNounsGeo\n' >> propernouns.protolexc
cat proper.tmp2 | grep '^G_' | sed 's/^G_//' >> propernouns.protolexc

Expand Down

0 comments on commit 21615e3

Please sign in to comment.