-
Notifications
You must be signed in to change notification settings - Fork 2
/
syl.py
executable file
·92 lines (79 loc) · 2.91 KB
/
syl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from nltk.corpus import cmudict
cmu_syl = cmudict.dict()
exception = {"am":1, "giant":2, "latecomer":3, "latecomers":3, "penelope":4, "table":2,
"onomatopoeia":6}
allwords = set(["late", "comer", "come", "coming", "cat", "ice", "cream"])
def stripPunc(sent):
"""Strips punctuation from list of words"""
puncList = [".",";",":","!","?","/","\\",",","#","@","$","&",")","(","\""]
for punc in puncList:
sent = sent.replace(punc,'')
return sent
#todo: "The chicken explodes" is counted as 6 syllables, fix it
def syllables(word):
"""
This function returns the number of syllables in a string. It first
checks whether it is a compound, if it is, it calls this function
recursively to count the number of syllable of each compound, else,
it counts syllables normally. I am also stripping the punctuation
and making every letter into lowercase;
"""
word = word.lower().strip(".:;?!").replace("-", "")
compoundsyllables = compound(word)
if compoundsyllables:
return compoundsyllables
else:
count = 0
vowels = 'aeiouy'
if word[0] in vowels:
count +=1
for index in range(1,len(word)):
if word[index] in vowels and word[index-1] not in vowels:
count +=1
if word.endswith('e'):
count -= 1
if word.endswith('le'):
count+=1
if word.endswith('cre'):
count+=1
if count == 0:
count +=1
return count
def compound(word):
"""
This function returns the number of syllable of a word if it is a
compound, if not, it returns false, only on the first split
from the left;
"""
l=len(word)
for index in range(0, l):
if word[:index] in allwords:
if word[index:] in allwords:
return (syllables(word[:index]) + syllables(word[index:]))
elif compound(word[index:]):
return (syllables(word[:index]) + compound(word[index:]))
return False
def num_of_syllables(line):
line = stripPunc(line)
wordlist = line.split()
counter = 0
for word in wordlist:
counter += nsyl(word)
return counter
def nsyl(word):
"""
This function computes the nuber of syllables in a word by first
trying check if the CMU dictionary has that info. If not, it uses
a rule-based system that is not yet completely correct but it's
getting there. When the CMU dictionary provides more than one
reading, we are defaulting to the max number of syllables, since
the other values are based on 'fast reading' pronunciations of
that same word.
"""
if word in exception.keys():
return exception[word]
else:
try:
return max([len(list(y for y in x if y[-1].isdigit())) for x in cmu_syl[word.lower()]]) # returning the max value
except KeyError:
return syllables(word)