-
Notifications
You must be signed in to change notification settings - Fork 0
/
word_cloud.py
53 lines (46 loc) · 1.35 KB
/
word_cloud.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
from wordcloud import WordCloud
from PIL import Image
import matplotlib.pyplot as plt
def create_wordcloud(text, filename):
path = "./word-clouds/" + filename
wordcloud = WordCloud(width=800, height=800,
background_color='white',
stopwords=None,
min_font_size=10).generate(text)
plt.figure(figsize=(8, 8), facecolor=None)
plt.imshow(wordcloud)
plt.axis("off")
plt.tight_layout(pad=0)
plt.savefig(path, dpi=300, bbox_inches='tight')
image = Image.open(path)
image.save(path, "PNG")
# TODO: Change the paths and the names of the files you want to create word clouds for
NEWS = {
"path": "./news/news.txt",
"name": "news"
}
BIBLE = {
"path": "./bible/preprocessed/preprocessed_text.txt",
"name": "bible"
}
WIKI_TL = {
"path": "./wiki_tl/preprocessed_wiki_tl.txt",
"name": "wiki tagalog"
}
LITERATURE = {
"path": "./historical/hist-preprocessed.txt",
"name": "Literature"
}
SONGS = {
"path": "./songs/preprocessed.txt",
"name": "songs"
}
topics = [NEWS, BIBLE, WIKI_TL, LITERATURE, SONGS]
# topics = [NEWS, WIKI_TL]
for topic in topics:
if topic["path"] != "":
with open(topic["path"], "r") as file:
print(topic)
text = file.read()
print("Exporting word cloud for " + topic["name"] + "...")
create_wordcloud(text, topic["name"] + ".png")