-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmetrics.py
75 lines (58 loc) · 1.99 KB
/
metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import spacy
from textstat.textstat import textstatistics, legacy_round
import textstat
def split_sentences(text):
set = spacy.load('en')
sent = set(text)
return sent.sents
def num_sentences(text):
set = spacy.load('en')
sent = set(text)
return len(list(sent.sents))
def num_words(text):
num_words = 0
sentence_list = split_sentences(text)
for sentence in sentence_list:
for word in sentence:
num_words += 1
return num_words
def avg_sent_length(text):
word_num = num_words(text)
nums_sentences = num_sentences(text)
avg_sent_length = float(word_num / nums_sentences)
return avg_sent_length
def syllables_count(word):
return textstatistics().syllable_count(str(word))
def total_syllables(text):
num_syllables = 0
sentence_list = split_sentences(text)
for sentence in sentence_list:
for word in sentence:
num_syllables += syllables_count(word)
return num_syllables
def avg_syllables_per_word(text):
syllables_counts = syllables_count(text)
nums_words = num_words(text)
avg_syllables_per_word = float(syllables_counts) / float(nums_words)
return (avg_syllables_per_word * 100) / 100
def hard_words(text):
hard_words = []
my_sents = split_sentences(text)
for sentence in my_sents:
for word in sentence:
hard_words += str(word)
hard_words_set = set()
for hard_word in hard_words:
num_syllables = syllables_count(hard_word)
if hard_word not in textstat.textstat._textstatistics__get_lang_easy_words() and num_syllables >= 2:
hard_words_set.add(hard_word)
return len(hard_words_set)
def fre(text):
return ((206.835 - (1.015 * float(num_words(text)/num_sentences(text))) -
(84.6 * float(total_syllables(text)/num_words(text)))))
def gf(text):
return (0.4 * (float(num_words(text)/num_sentences(text)) +
(100 * (hard_words(text) / num_words(text)))))
text = ""
print("Flesch Reading Ease Index: " + str(fre(text)))
print("Gunning Fog Index: " + str(gf(text)))