-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit b00b9ca
Showing
5 changed files
with
129 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
# Cebuano-Syllable-Decoder | ||
Cebuano Syllable-Decoder | ||
|
||
## Installation | ||
* `pip install cebsyldec` or | ||
* inside the folder run `python setup.py install` | ||
|
||
## Requirements | ||
* `python>=2.7` | ||
|
||
## Functions | ||
* syllabicate(word='') | ||
- Accepts a Cebuano word and returns the syllables of the word | ||
- Default Output: List of syllables | ||
``` | ||
VC+CVC etc. | ||
``` | ||
## How to Use | ||
``` | ||
from cebsyllabicator import syllabicator | ||
|
||
syllabicator.syllabicate('kaonon') | ||
|
||
Output: | ||
ka + o + non | ||
CV + V + CVC | ||
|
||
## References | ||
|
||
* https://www.youtube.com/watch?v=ZULS0evRLHg | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
def get_CV_sequence(word): | ||
word=word.lower() | ||
vowels=["a","e","i","o","u"] | ||
consonants=["p", "t", "k", "b", "d", "g", "m", "n", "ng", "s", "h", "l", "r","w","y"] | ||
consonant_clusters=["pw", "py", "pr", "pl", "tw", "ty", "tr", "ts", "kw","ky","kr","kl","bw","by","br","bl","dw","dy","dr","gw","gr","mw","my","nw","ny","sw","sy","hw"] | ||
glottal_stop=["-"] # plus VV | ||
|
||
prev_cons=None | ||
cv_seq="" | ||
skip=False | ||
|
||
for char in word: | ||
if char not in vowels and char not in consonants: | ||
continue | ||
elif skip: | ||
skip=False | ||
continue | ||
elif prev_cons=="n" and char=="g": | ||
cv_seq += "C" | ||
skip=True | ||
elif prev_cons and char in vowels: | ||
cv_seq += "CV" | ||
prev_cons = None | ||
elif char in vowels: | ||
cv_seq += "V" | ||
elif prev_cons and prev_cons+char in consonant_clusters: | ||
cv_seq += "CC" | ||
prev_cons = None | ||
elif prev_cons: | ||
cv_seq += "C" | ||
prev_cons = char | ||
else: | ||
prev_cons = char | ||
|
||
if prev_cons: | ||
cv_seq += "C" | ||
|
||
return cv_seq | ||
|
||
def get_syllable_sequence(word): | ||
word=word.lower() | ||
syl_seq = get_CV_sequence(word) | ||
|
||
while "CCVCCV" in syl_seq: | ||
syl_seq = syl_seq.replace("CCVCCV","CCVC-CV") | ||
while "CCVCV" in syl_seq: | ||
syl_seq = syl_seq.replace("CCVCV","CCV-CV") | ||
while "VCC" in syl_seq: | ||
syl_seq = syl_seq.replace("VCC","VC-C") | ||
while "CVCV" in syl_seq: | ||
syl_seq = syl_seq.replace("CVCV","CV-CV") | ||
while "VV" in syl_seq: | ||
syl_seq = syl_seq.replace("VV","V-V") | ||
while "VCVC" in syl_seq: | ||
syl_seq = syl_seq.replace("VCVC","V-CVC") | ||
return syl_seq | ||
|
||
|
||
def get_syllables(word): | ||
word=word.lower() | ||
syl_seq = get_syllable_sequence(word) | ||
|
||
syl_seq_arr = syl_seq.split("-") | ||
syllables=[] | ||
i=0 | ||
for syl in syl_seq_arr: | ||
chars = len(syl) | ||
if "ng" in word[i:i+chars+1]: | ||
syllables.append(word[i:i+chars+1]) | ||
i += chars+1 | ||
else: | ||
syllables.append(word[i:i+chars]) | ||
i += chars | ||
|
||
return syllables | ||
|
||
|
||
if __name__ == "__main__": | ||
pass |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
[metadata] | ||
description-file = README.md |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
from distutils.core import setup | ||
|
||
|
||
setup( | ||
name = 'cebsyllabicator', | ||
packages = ['cebsyllabicator'], | ||
version = '1.0', | ||
description = "A Cebuano Syllabicator", | ||
author = 'Eric John Emberda', | ||
author_email = 'eric.emberda@gmail.com', | ||
url = 'https://github.com/eemberda/Cebuano-Syllabicator', | ||
download_url = 'https://github.com/eemberda/Cebuano-Syllabicator/archive/1.0.tar.gz', | ||
keywords = ['syllabicator', 'cebuano-syllabicator'], | ||
classifiers = [], | ||
data_files=[] | ||
) |