-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathanalysis.R
69 lines (59 loc) · 2.15 KB
/
analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#install.packages("ggplot2")
#install.packages("tm")
#install.packages("wordcloud")
#install.packages("syuzhet")
#install.packages("SnowballC")
#install.packages("openNLP")
#install.packages("NLP")
#install.packages("RColorBrewer")
#install.packages("tidyverse")
#install.packages("tibble")
library(tidyverse)
library(tibble)
library(ggplot2)
library(tm)
library(wordcloud)
library(syuzhet)
library(NLP)
library(openNLP)
library(RColorBrewer)
texts <- tibble(readLines("location/chat.txt"))
docs <- VCorpus(VectorSource(texts))
trans <- content_transformer(function(x, pattern) gsub(pattern, "", x))
#Clean-up
docs <- tm_map(docs, trans, "/")
docs <- tm_map(docs, trans, "@")
docs <- tm_map(docs, trans, "\\|")
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeNumbers)
docs <- tm_map(docs, removeWords, stopwords("english"))
docs <- tm_map(docs, removePunctuation)
docs <- tm_map(docs, stripWhitespace)
docs <- tm_map(docs, stemDocument)
#Converting to TermDocument Matrix
dtm <- TermDocumentMatrix(docs)
mat <- as.matrix(dtm)
v <- sort(rowSums(mat), decreasing = T) #Converting to Dataframes
d <- data.frame(word = names(v), freq = v)
set.seed(1056)
#Constructing wordcloud
wordcloud(words = d$word, freq = d$freq, min.freq = 1, max.words = 200, random.order = F, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))
#Frequently used words
barplot(d[1:10,]$freq, las = 2, names.arg = d[1:10,]$word,
col = "lightblue", main = "Most frequent words",
ylab = "Word frequencies")
#Analysing Sentiment in text
sentiment <- (get_nrc_sentiment(as.character(texts), language = "english"))
text <- cbind(texts, sentiment)
TotalSentiment <- data.frame(colSums(text[, c(2:11)]))
names(TotalSentiment) <- "count"
TotalSentiment <- cbind("sentiment" = rownames(TotalSentiment), TotalSentiment)
print(TotalSentiment)
rownames(TotalSentiment) <- NULL
#Bar Chart
ggplot(data = TotalSentiment, aes(x = sentiment, y = count)) +
geom_bar(aes(fill = sentiment), stat = "identity") +
theme(legend.position = "none") +
xlab("sentiment") +
ylab("TotalCount") +
ggtitle("Total Sentiment Score")