initial

eemberda · Nov 22, 2022 · b00b9ca · b00b9ca
commit b00b9ca
Show file tree

Hide file tree

Showing 5 changed files with 129 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -0,0 +1,32 @@
+# Cebuano-Syllable-Decoder
+Cebuano Syllable-Decoder
+
+## Installation
+* `pip install cebsyldec` or
+* inside the folder run `python setup.py install`
+
+## Requirements
+* `python>=2.7`
+
+## Functions
+* syllabicate(word='')
+   - Accepts a Cebuano word and returns the syllables of the word
+   - Default Output: List of syllables
+      ```
+        VC+CVC etc.
+      ```
+   
+## How to Use
+```
+from cebsyllabicator import syllabicator
+
+syllabicator.syllabicate('kaonon')
+
+Output: 
+   ka + o + non
+   CV + V + CVC
+
+## References
+
+* https://www.youtube.com/watch?v=ZULS0evRLHg
+
diff --git a/cebsyldec/syllabledecoder.py b/cebsyldec/syllabledecoder.py
@@ -0,0 +1,79 @@
+def get_CV_sequence(word):
+    word=word.lower()
+    vowels=["a","e","i","o","u"]
+    consonants=["p", "t", "k", "b", "d", "g", "m", "n", "ng", "s", "h", "l", "r","w","y"]
+    consonant_clusters=["pw", "py", "pr", "pl", "tw", "ty", "tr", "ts", "kw","ky","kr","kl","bw","by","br","bl","dw","dy","dr","gw","gr","mw","my","nw","ny","sw","sy","hw"]
+    glottal_stop=["-"] # plus VV
+
+    prev_cons=None
+    cv_seq=""
+    skip=False
+
+    for char in word:
+        if char not in vowels and char not in consonants:
+            continue
+        elif skip:
+            skip=False
+            continue    
+        elif prev_cons=="n" and char=="g":
+            cv_seq += "C"
+            skip=True
+        elif prev_cons and char in vowels:
+            cv_seq += "CV"
+            prev_cons = None
+        elif char in vowels:
+            cv_seq += "V"
+        elif prev_cons and prev_cons+char in consonant_clusters:
+            cv_seq += "CC"
+            prev_cons = None
+        elif prev_cons:
+            cv_seq += "C"
+            prev_cons = char
+        else:
+            prev_cons = char
+
+    if prev_cons:
+        cv_seq += "C"
+
+    return cv_seq
+
+def get_syllable_sequence(word):
+    word=word.lower()
+    syl_seq = get_CV_sequence(word)
+
+    while "CCVCCV" in syl_seq:
+        syl_seq = syl_seq.replace("CCVCCV","CCVC-CV")
+    while "CCVCV" in syl_seq:
+        syl_seq = syl_seq.replace("CCVCV","CCV-CV")
+    while "VCC" in syl_seq:
+        syl_seq = syl_seq.replace("VCC","VC-C")
+    while "CVCV" in syl_seq:
+        syl_seq = syl_seq.replace("CVCV","CV-CV")
+    while "VV" in syl_seq:
+        syl_seq = syl_seq.replace("VV","V-V")
+    while "VCVC" in syl_seq:
+        syl_seq = syl_seq.replace("VCVC","V-CVC")
+    return syl_seq
+
+
+def get_syllables(word):
+    word=word.lower()
+    syl_seq = get_syllable_sequence(word)
+
+    syl_seq_arr = syl_seq.split("-")
+    syllables=[]
+    i=0
+    for syl in syl_seq_arr:
+        chars = len(syl)
+        if "ng" in word[i:i+chars+1]:
+            syllables.append(word[i:i+chars+1])
+            i += chars+1
+        else:
+            syllables.append(word[i:i+chars])
+            i += chars
+
+    return syllables
+
+
+if __name__ == "__main__":
+    pass
diff --git a/requirements.txt b/requirements.txt
diff --git a/setup.cfg b/setup.cfg
@@ -0,0 +1,2 @@
+[metadata]
+description-file = README.md
diff --git a/setup.py b/setup.py
@@ -0,0 +1,16 @@
+from distutils.core import setup
+
+
+setup(
+  name = 'cebsyllabicator',
+  packages = ['cebsyllabicator'],
+  version = '1.0',
+  description = "A Cebuano Syllabicator",
+  author = 'Eric John Emberda',
+  author_email = 'eric.emberda@gmail.com',
+  url = 'https://github.com/eemberda/Cebuano-Syllabicator',
+  download_url = 'https://github.com/eemberda/Cebuano-Syllabicator/archive/1.0.tar.gz',
+  keywords = ['syllabicator', 'cebuano-syllabicator'],
+  classifiers = [],
+  data_files=[]
+)