From fddd02c4c0f73d77a2fc3a0c30c95ce3cefabd6b Mon Sep 17 00:00:00 2001 From: Robin Malhotra Date: Sat, 24 Oct 2015 23:05:09 +0530 Subject: [PATCH 1/2] python3.4 compatibility --- __init__.py | 0 readability.py | 65 ++++++++++++++++++++++++------------------------- syllables_en.py | 1 - utils.py | 21 ++++++++-------- 4 files changed, 42 insertions(+), 45 deletions(-) create mode 100644 __init__.py diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/readability.py b/readability.py index 2a96610..55243f4 100755 --- a/readability.py +++ b/readability.py @@ -2,11 +2,11 @@ import math -from utils import get_char_count -from utils import get_words -from utils import get_sentences -from utils import count_syllables -from utils import count_complex_words +from .utils import get_char_count +from .utils import get_words +from .utils import get_sentences +from .utils import count_syllables +from .utils import count_complex_words class Readability: @@ -23,7 +23,7 @@ def analyze_text(self, text): syllable_count = count_syllables(words) complexwords_count = count_complex_words(text) avg_words_p_sentence = word_count/sentence_count - + self.analyzedVars = { 'words': words, 'char_cnt': float(char_count), @@ -35,44 +35,44 @@ def analyze_text(self, text): } def ARI(self): - score = 0.0 + score = 0.0 if self.analyzedVars['word_cnt'] > 0.0: score = 4.71 * (self.analyzedVars['char_cnt'] / self.analyzedVars['word_cnt']) + 0.5 * (self.analyzedVars['word_cnt'] / self.analyzedVars['sentence_cnt']) - 21.43 return score - + def FleschReadingEase(self): - score = 0.0 + score = 0.0 if self.analyzedVars['word_cnt'] > 0.0: score = 206.835 - (1.015 * (self.analyzedVars['avg_words_p_sentence'])) - (84.6 * (self.analyzedVars['syllable_cnt']/ self.analyzedVars['word_cnt'])) return round(score, 4) - + def FleschKincaidGradeLevel(self): - score = 0.0 + score = 0.0 if self.analyzedVars['word_cnt'] > 0.0: score = 0.39 * (self.analyzedVars['avg_words_p_sentence']) + 11.8 * (self.analyzedVars['syllable_cnt']/ self.analyzedVars['word_cnt']) - 15.59 return round(score, 4) - + def GunningFogIndex(self): - score = 0.0 + score = 0.0 if self.analyzedVars['word_cnt'] > 0.0: score = 0.4 * ((self.analyzedVars['avg_words_p_sentence']) + (100 * (self.analyzedVars['complex_word_cnt']/self.analyzedVars['word_cnt']))) return round(score, 4) def SMOGIndex(self): - score = 0.0 + score = 0.0 if self.analyzedVars['word_cnt'] > 0.0: score = (math.sqrt(self.analyzedVars['complex_word_cnt']*(30/self.analyzedVars['sentence_cnt'])) + 3) return score def ColemanLiauIndex(self): - score = 0.0 + score = 0.0 if self.analyzedVars['word_cnt'] > 0.0: score = (5.89*(self.analyzedVars['char_cnt']/self.analyzedVars['word_cnt']))-(30*(self.analyzedVars['sentence_cnt']/self.analyzedVars['word_cnt']))-15.8 return round(score, 4) def LIX(self): longwords = 0.0 - score = 0.0 + score = 0.0 if self.analyzedVars['word_cnt'] > 0.0: for word in self.analyzedVars['words']: if len(word) >= 7: @@ -82,27 +82,26 @@ def LIX(self): def RIX(self): longwords = 0.0 - score = 0.0 + score = 0.0 if self.analyzedVars['word_cnt'] > 0.0: for word in self.analyzedVars['words']: if len(word) >= 7: longwords += 1.0 score = longwords / self.analyzedVars['sentence_cnt'] return score - - -if __name__ == "__main__": - text = """We are close to wrapping up our 10 week Rails Course. This week we will cover a handful of topics commonly encountered in Rails projects. We then wrap up with part 2 of our Reddit on Rails exercise! By now you should be hard at work on your personal projects. The students in the course just presented in front of the class with some live demos and a brief intro to to the problems their app were solving. Maybe set aside some time this week to show someone your progress, block off 5 minutes and describe what goal you are working towards, the current state of the project (is it almost done, just getting started, needs UI, etc.), and then show them a quick demo of the app. Explain what type of feedback you are looking for (conceptual, design, usability, etc.) and see what they have to say. As we are wrapping up the course you need to be focused on learning as much as you can, but also making sure you have the tools to succeed after the class is over.""" - - rd = Readability(text) - print 'Test text:' - print '"%s"\n' % text - print 'ARI: ', rd.ARI() - print 'FleschReadingEase: ', rd.FleschReadingEase() - print 'FleschKincaidGradeLevel: ', rd.FleschKincaidGradeLevel() - print 'GunningFogIndex: ', rd.GunningFogIndex() - print 'SMOGIndex: ', rd.SMOGIndex() - print 'ColemanLiauIndex: ', rd.ColemanLiauIndex() - print 'LIX: ', rd.LIX() - print 'RIX: ', rd.RIX() +# +# if __name__ == "__main__": +# text = """We are close to wrapping up our 10 week Rails Course. This week we will cover a handful of topics commonly encountered in Rails projects. We then wrap up with part 2 of our Reddit on Rails exercise! By now you should be hard at work on your personal projects. The students in the course just presented in front of the class with some live demos and a brief intro to to the problems their app were solving. Maybe set aside some time this week to show someone your progress, block off 5 minutes and describe what goal you are working towards, the current state of the project (is it almost done, just getting started, needs UI, etc.), and then show them a quick demo of the app. Explain what type of feedback you are looking for (conceptual, design, usability, etc.) and see what they have to say. As we are wrapping up the course you need to be focused on learning as much as you can, but also making sure you have the tools to succeed after the class is over.""" +# +# rd = Readability(text) +# print 'Test text:' +# print '"%s"\n' % text +# print 'ARI: ', rd.ARI() +# print 'FleschReadingEase: ', rd.FleschReadingEase() +# print 'FleschKincaidGradeLevel: ', rd.FleschKincaidGradeLevel() +# print 'GunningFogIndex: ', rd.GunningFogIndex() +# print 'SMOGIndex: ', rd.SMOGIndex() +# print 'ColemanLiauIndex: ', rd.ColemanLiauIndex() +# print 'LIX: ', rd.LIX() +# print 'RIX: ', rd.RIX() diff --git a/syllables_en.py b/syllables_en.py index ea02f22..e07bdca 100644 --- a/syllables_en.py +++ b/syllables_en.py @@ -138,4 +138,3 @@ def count(word): fallback_cache[word] = count return count - diff --git a/utils.py b/utils.py index e319caf..26e8b2b 100644 --- a/utils.py +++ b/utils.py @@ -6,17 +6,17 @@ import nltk from nltk.tokenize import RegexpTokenizer -import syllables_en - +from .syllables_en import count TOKENIZER = RegexpTokenizer('(?u)\W+|\$[\d\.]+|\S+') SPECIAL_CHARS = ['.', ',', '!', '?'] def get_char_count(words): characters = 0 for word in words: - characters += len(word.decode("utf-8")) + print(word) + characters += len(word) return characters - + def get_words(text=''): words = [] words = TOKENIZER.tokenize(text) @@ -38,7 +38,7 @@ def get_sentences(text=''): def count_syllables(words): syllableCount = 0 for word in words: - syllableCount += syllables_en.count(word) + syllableCount += count(word) return syllableCount #This method must be enhanced. At the moment it only @@ -50,11 +50,11 @@ def count_complex_words(text=''): complex_words = 0 found = False cur_word = [] - - for word in words: + + for word in words: cur_word.append(word) if count_syllables(cur_word)>= 3: - + #Checking proper nouns. If a word starts with a capital letter #and is NOT at the beginning of a sentence we don't add it #as a complex word. @@ -65,10 +65,9 @@ def count_complex_words(text=''): if str(sentence).startswith(word): found = True break - if found: + if found: complex_words += 1 found = False - + cur_word.remove(word) return complex_words - From 1238a8268aa32ab9497f46d0fe05ab6457d9770e Mon Sep 17 00:00:00 2001 From: Robin Malhotra Date: Sat, 24 Oct 2015 23:08:55 +0530 Subject: [PATCH 2/2] python3.4 compatibility + edited README --- README.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index fcd269f..88e5959 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ Readability ==================== A collection of functions that measure the readability of a given body of text. I'd -recommend checking out the wikipedia articles below--most of the metrics estimate +recommend checking out the wikipedia articles below--most of the metrics estimate the grade level required to comprehend a given block of text and may return odd results on small snippets of text. @@ -43,3 +43,12 @@ Largely lifted from: https://github.com/nltk/nltk_contrib/tree/master/nltk_contrib/readability SMOG index appears to perform most accurately. + +#Usage + +``` +from readability.readability import Readability + + +#caveats +The cloned library should be in a folder titled 'readability' at the root directory