-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwordC.py
81 lines (67 loc) · 2.5 KB
/
wordC.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import pandas as pd
import wordcloud as wc
from wordcloud import STOPWORDS
import os
import matplotlib.pyplot as plt
import csv
titles = []
body = []
#additional words to omit from cloud:
stop_words = ["NA", "NA NA","https"] + list(STOPWORDS)
directory = os.fsdecode('/Users/grantfinn/Library/Mobile Documents/com~apple~CloudDocs/UROP Research/Dump')
os.chdir(directory)
save_path = '/Users/grantfinn/Library/Mobile Documents/com~apple~CloudDocs/UROP Research/Clouds'
try:
os.mkdir(save_path)
except:
pass
def concatenate_list_data(list):
result= ''
for element in list:
result += str(element)
return result
for file in os.listdir(directory):
print(file)
# filename = os.fsdecode(file)
filename = file
if filename.endswith(".csv"):
# titles = str()
# body = str()
with open(filename, 'r', newline='', encoding='utf-8') as fh:
# reader = csv.reader(fh, delimiter=',')
reader = pd.read_csv(fh, low_memory=False)
for row in reader.values:
try:
#titles.join(str(row[-2]))
titles.append(str(row[-2]))
except:
pass
try:
# body.join(str(row[-1]))
body.append(str(row[-1]))
except:
pass
# try:
# titles = '\t'.join([i[-2] for i in reader])
# except:
# pass
# try:
# body = '\t'.join([j[-1] for j in reader])
# except:
# pass
save_token = ( '/' + filename[:-9] )
T = concatenate_list_data(titles)
B = concatenate_list_data(body)
# print(titles)
#WordCloud for just titles of posts
wordcloudTitles = wc.WordCloud(stopwords = stop_words, width=800, height=400).generate(T)
#Wordcloud for just body of posts
wordcloudBody = wc.WordCloud(stopwords = stop_words, width=800, height=400).generate(B)
#plt.imshow(wordcloudTitles, interpolation='bilinear')
#plt.axis("off")
#plt.show()
# plt.imshow(wordcloudBody, interpolation='bilinear')
# plt.axis("off")
# plt.show()
wordcloudTitles.to_file(save_path+save_token+'_Title.png')
wordcloudBody.to_file(save_path+save_token+'_Body.png')