1+ import nltk
2+ nltk .download ('punkt_tab' )
3+ nltk .download ('stopwords' )
4+ from nltk .corpus import stopwords
5+ from nltk .tokenize import word_tokenize , sent_tokenize
6+ import string
7+
8+
9+ def summarize_text_nltk (text , num_sentences = 3 ):
10+ """
11+ Summarizes the input text using a frequency-based method with NLTK.
12+
13+ Args:
14+ text (str): The text to be summarized.
15+ num_sentences (int): The desired number of sentences in the summary.
16+
17+ Returns:
18+ str: The summarized text.
19+ """
20+ if not text :
21+ return ""
22+ text = " " .join (text .split ())
23+ sentences = sent_tokenize (text )
24+ if len (sentences ) <= num_sentences :
25+ return text
26+ stop_words = set (stopwords .words ('english' ))
27+ punctuations = string .punctuation
28+ words = word_tokenize (text .lower ())
29+ clean_words = [word for word in words if word .isalnum () and word not in stop_words ]
30+ word_freq = {}
31+ for word in clean_words :
32+ word_freq [word ] = word_freq .get (word , 0 ) + 1
33+ max_freq = max (word_freq .values ())
34+ for word in word_freq :
35+ word_freq [word ] = word_freq [word ] / max_freq
36+ sentence_scores = {}
37+ for sentence in sentences :
38+ for word in word_tokenize (sentence .lower ()):
39+ if word in word_freq :
40+ if sentence not in sentence_scores :
41+ sentence_scores [sentence ] = word_freq [word ]
42+ else :
43+ sentence_scores [sentence ] += word_freq [word ]
44+ sorted_sentences = sorted (sentence_scores .items (), key = lambda item : item [1 ], reverse = True )
45+ summary_sentences = [sentence for sentence , score in sorted_sentences [:num_sentences ]]
46+ final_summary = []
47+ selected_sentences_set = set (summary_sentences )
48+
49+ for sentence in sentences :
50+ if sentence .strip () in selected_sentences_set :
51+ final_summary .append (sentence )
52+
53+ return " " .join (final_summary )
54+
55+ input_text = """
56+ Hacktoberfest is a month-long celebration of open source software, where developers from around
57+ the world contribute to various projects. It encourages participation in the open source community
58+ and helps improve software quality through collaborative efforts. Participants can earn rewards by
59+ making contributions, such as pull requests, to eligible repositories on GitHub.
60+ """
61+
62+ summary = summarize_text_nltk (input_text , num_sentences = 1 )
63+
64+ print (input_text )
65+
66+ print (summary )
0 commit comments