Skip to content

Commit

Permalink
Merge pull request #49 from sayantikabanik/fix_summarization
Browse files Browse the repository at this point in the history
LSA applied to pros and cons
  • Loading branch information
sayantikabanik authored Aug 10, 2022
2 parents 2c9c05d + f5965a2 commit 8c2a674
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 4 deletions.
6 changes: 4 additions & 2 deletions analysis_framework/dashboard/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import holoviews as hv
import hvplot.pandas
from analysis_framework.utils.text_summarization import summarize_text as stext
#from analysis_framework.utils.text_summarization import summarize_text_lexrank as stext_lexrank
from analysis_framework.utils.text_summarization import summarize_text_lsa as stext_lsa
from analysis_framework.utils import burnout_index_calculation
from analysis_framework.utils import static as rs
hv.extension('bokeh')
Expand Down Expand Up @@ -119,7 +121,7 @@ def pros_widget(location_widget_b, cluster_widget_b, comp_widget_b):
pros = pn.Column(
'# Pros',
pn.layout.Divider(),
stext(display_output),
stext_lsa(display_output),
background='#e4ede6', height=500, width=500
)
return pros
Expand All @@ -137,7 +139,7 @@ def cons_widget(location_widget_b, cluster_widget_b, comp_widget_b):
cons = pn.Column(
'# Cons',
pn.layout.Divider(),
stext(display_output),
stext_lsa(display_output),
background='#fac8d3', height=500, width=500
)
return cons
2 changes: 1 addition & 1 deletion analysis_framework/utils/static.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@

# Text summarization
# ---------------------------------
sentence_count=2
sentence_count=3
bonus_words_list = ['work life balance', 'job security', 'work environment',
'career growth', 'salary', 'culture', 'career' , 'pay',
'improvement', 'manager', 'employees', 'company',
Expand Down
15 changes: 14 additions & 1 deletion analysis_framework/utils/text_summarization.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words
from sumy.summarizers.edmundson import EdmundsonSummarizer
from sumy.summarizers.lex_rank import LexRankSummarizer
from sumy.summarizers.lsa import LsaSummarizer
from analysis_framework.utils import static as rs

# Uncomment below imports if not pre-installed
Expand All @@ -22,5 +24,16 @@ def summarize_text(text, language='english'):
for sentence in summary:
count += 1
sentence_list.append(str(f'{count}. {sentence}'))
output = "\n".join(sentence_list)
output = "\n\n".join(sentence_list)
return output

def summarize_text_lsa(text, language='english'):
summarizer = LsaSummarizer(Stemmer(language))
sentence_list=[]
count=0
summary = summarizer(PlaintextParser(text, Tokenizer(language)).document, rs.sentence_count)
for sentence in summary:
count += 1
sentence_list.append(str(f'{count}. {sentence}'))
output = "\n\n".join(sentence_list)
return output

0 comments on commit 8c2a674

Please sign in to comment.