-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsvo-senti.py
101 lines (80 loc) · 2.8 KB
/
svo-senti.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import os
import sys
import csv
import svo
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk import tokenize
sentences = []
def recursive_file_gen(mydir):
for root, dirs, files in os.walk(mydir):
for file in files:
#print (file[-3:])
if file[-3:] == "txt":
print (file)
yield os.path.join(root, file)
def splitSentences():
paragraph = sys.argv[1]
lines_list = tokenize.sent_tokenize(paragraph)
sentences.extend(lines_list)
def splitSentences2():
path = '/home/triya/Desktop/cnn/cnn_cleaned/'
l = list(recursive_file_gen(path))
for filename in l:
with open (filename) as f:
paragraph = f.read()
lines_list = tokenize.sent_tokenize(paragraph)
sentences.extend(lines_list)
def writeCSV(f, svo_list, np, ss):
with open('data.csv', 'a') as csvfile:
spamwriter = csv.writer(csvfile, delimiter=' ', quotechar='|', quoting=csv.QUOTE_MINIMAL)
spamwriter.writerow([f, svo_list, np, ss['compound'], ss['neg'], ss['neu'], ss['pos']])
def getSVOs(sentence):
svo.parser = svo.English()
#parse = parser(u'Trump defends son in Paris.')
f = sentence.decode('utf-8')
#print type(f)
parse = svo.parser(f)
#print f
print "SVO: ",
#print(svo.findSVOs(parse))
svo_list = svo.findSVOs(parse)
if not svo_list:
svo_list = svo.findSVs(parse)
print(svo_list)
#for word in parse:
# print(word.text, word.pos_, word.dep_)
print "Noun phrases: ",
npset = ''
for np in parse.noun_chunks:
print(np.text),
npset = npset + ',' + np.text
print
return(f, svo_list, npset)
def getSentiments(sentence):
#sentences = ["Trump defends son in Paris.",
#"Two close confidants of Pope Francis have written an article in a Jesuit journal that strongly criticizes some American religious supporters of President Donald Trump for their fundamentalist views.",
#"The President's son has shown astonishingly poor judgment."
#]
#paragraph = "Trump's dealings with Russia are a never-ending stream of controversy. \
# As President, Trump has the authority to declassify any material he wishes to, but the move was shocking. Critics called it \"reckless\" and \"dangerous\". \
# Trump has gone out on a thin limb, risking charges of obstruction of justice to protect Mike Flynn, who lost his White House job after lying about contacts with Russians."
#lines_list = tokenize.sent_tokenize(paragraph)
#sentences.extend(lines_list)
sid = SentimentIntensityAnalyzer()
ss = sid.polarity_scores(sentence)
print "Polarity scores: ",
for k in sorted(ss):
print('{0}: {1} '.format(k, ss[k])),
print
return ss
def main():
#splitSentences()
splitSentences2()
for sentence in sentences:
print(sentence)
f, svo_list, np = getSVOs(sentence)
ss = getSentiments(sentence)
writeCSV(f, svo_list, np, ss)
print
if __name__ == "__main__":
main()