forked from wmay/R_workshop
-
Notifications
You must be signed in to change notification settings - Fork 0
/
script.R
139 lines (103 loc) · 3.41 KB
/
script.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# <-- number sign is a comment
# press control+enter to run a line in a script
# math operators
1 + 1
2 * 3
3^2
(1 + 1)^3
# variables
v1 <- 3
v2 = 4 # same thing
v1 + v2
# vectors
1:100 # creates a vector or 100 integers, 1 - 100
# vectorized functions
(1:25)^2
1:25 + 2
mean(1:25)
sd(1:25)
# common statistical distributions
dnorm(0) # normal distribution (density)
curve(dnorm, -3, 3) # plot the function
dnorm(-3:3) # it's vectorized!
pnorm(1.96) # normal distribution (cumulative)
# same as the table in the back of your stats textbook
curve(pnorm, -3, 3)
rnorm(1) # random sample from a normal distribution
hist(rnorm(10000))
# how you would normally read in data
csv = read.csv("some_file.csv")
# get CCES data
# If needed, download data from
# https://dataverse.harvard.edu/dataset.xhtml?persistentId=hdl:1902.1/21447
# Make sure to change the working directory to the folder with the data
# (Session -> Set Working Directory)
load("commoncontent2012.RData")
ls() # list the variables
cces = x
class(cces) # new type of object -- a data frame
dim(cces) # how many rows and columns?
class(cces$gender) # a factor
table(cces$gender)
barplot(table(cces$gender))
# subsetting
# Another vectorized function, checking every inputstate in cces
# returns a vector of 54,535 TRUE/FALSE values
sum(cces$inputstate == "New York")
which(cces$inputstate == "New York")
# cces[row, column]
# only the rows where the inputstate is New York, all columns
ny = cces[cces$inputstate == "New York", ]
# same thing
barplot(table(cces$CC334A), main = "US Political Views")
barplot(table(ny$CC334A), main = "New York Political Views")
# install the basicspace package (if needed)
install.packages(basicspace)
# load the basicspace package
library(basicspace)
# get information about the aldmck [Aldrich-McKelvey scaling] function
?aldmck
# CC334A = self, CC334C = Obama, CC334D = Romney,
# CC334E = Democratic Party, CC334F = Republican Party,
# CC334G = Tea Party, CC334P = Supreme Court
scales = paste("CC334", LETTERS[c(1, 3:7, 16)], sep = "")
am_scales = cces[, scales]
# 'apply' functions: 'for' loops without the loop
m = matrix(nrow = nrow(am_scales), ncol = ncol(am_scales))
for (n in 1:ncol(m)) {
m[, n] = as.numeric(am_scales[, n])
}
# ^^^ same as above
m = sapply(am_scales, as.numeric)
unique(as.vector(m))
m[m == 8] = NA
unique(as.vector(m))
colnames(m) = c("Self", "Obama", "Romney", "Democratic Party",
"Republican Party", "Tea Party", "Supreme Court")
results = aldmck(m, respondent = 1, polarity = 2)
plot(results)
plot.AM(results)
class(results)
str(results) # what is in this object?
plot(density(results$respondents$idealpt,
from = -2, to = 2, na.rm = T), add = T)
plot(density(results$respondents$idealpt[cces$inputstate == "New York"],
from = -2, to = 2, na.rm = T),
main = "Political Ideology (New York Only)")
# make a rainbow density graph
ny_ideals = density(results$respondents$idealpt[cces$inputstate == "New York"],
from = -2, to = 2, na.rm = T)
plot(ny_ideals, ylab = "", xlab = "Liberal to Conservative",
xlim = c(-2, 2), main = "Political Ideology (New York Only)")
str(ny_ideals)
xs = ny_ideals$x
ys = ny_ideals$y
# colors from blue to red
cols = rainbow(length(xs), start=4/6, end=0, v=.8, s=.5)
x = 1
for (n in 1:length(xs)) {
polygon(c(xs[n], xs[n], xs[n + 1], xs[n + 1]),
c(0, ys[n], ys[n + 1], 0),
col = cols[x], border = NA)
x = x + 1
}