-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path1.Descriptive_Analysis.R
230 lines (220 loc) · 11.9 KB
/
1.Descriptive_Analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
index <- Itemdesc %>% filter(item == "index")
index1 <- index %>% dplyr::select(item, CIVED_1999, ICCS_2009, ICCS_2016) %>%
pivot_longer(-item) %>%
mutate(C = ifelse(value %in% c("IMMIGMLE", "IMMRGHT", "E_IMMRGHT"), index$D3[1], ifelse(value %in% c("WOMRTMLE", "GENEQL", "S_GENEQL"), index$D3[2],
ifelse(value %in% c("ETHRGHT", "S_ETHRGHT"), index$D3[3], NA)))) #"MINORMLE", is not in CIVED1999
items <- Itemdesc %>% filter(item != "index") %>% dplyr::select(item, CIVED_1999, ICCS_2009, ICCS_2016) %>%
pivot_longer(-item) %>%
mutate(Construct = ifelse(substr(item,1,1) == "I", paste0(index$D3[1]),
ifelse(substr(item,1,1) == "G", paste0(index$D3[2]), paste0(index$D3[3]))))
ha <- items %>% filter(name == "CIVED_1999")
cat(" \n")
cat(" \n")
cat('## CIVED1999 \n')
cat(" \n")
cat(" \n")
for (j in 1:length(unique(ha$Construct))){
cat(" \n")
cat(" \n")
cat(paste0('### Construct: ', unique(ha$Construct)[j], ' \n'))
cat(" \n")
cat(" \n")
hl <- ha %>% filter(Construct == unique(ha$Construct)[j]) %>% na.omit()
constr_name <- hl$value
ind_name1 <- index1 %>% filter(name == "CIVED_1999")
ind_name <- ind_name1$value
cat(" \n")
cat(" \n")
cat(paste('**Items:', paste(constr_name,collapse = ", "),'** \n'))
cat(" \n")
cat(" \n")
for (i in 1:length(constr_name)){
data <- ISC_desc %>% dplyr::select(COUNTRY, SENWGT_Gc1, all_of(constr_name[i]), GENDER) %>%
mutate_at(c(constr_name[i],"GENDER"), as_factor) %>%
mutate(GENDER = factor(GENDER, levels = c("Male", "Female"), labels = c("Boy", "Girl"))) %>%
na.omit()
g1 <- data %>%
ggplot(aes_string(x = constr_name[i], y = "..prop..", group = "COUNTRY", fill = "COUNTRY")) +
geom_bar(aes(weight = SENWGT_Gc1), alpha = 0.5) +
geom_bar(aes(weight = SENWGT_Gc1), data = transform(data, GENDER = "Total"), alpha = 0.5) +
facet_grid(COUNTRY ~ GENDER, switch = "y") +
scale_fill_discrete(guide = FALSE) +
ggtitle(str_wrap(tolower(eval(parse(text=paste0("attributes(data$",constr_name[i],")$label")))), 60)) +
ylab("Weighted proportion of responses") +
xlab(paste0(constr_name[i],"- CIVED_1999"))+
geom_text(aes(label = scales::percent(..prop.., accuracy = 0.1), y= ..prop.. ), stat= "count", vjust = -.5, size = 2) +
geom_text(data = transform(data, GENDER = "Total"), aes(label = scales::percent(..prop.., accuracy = 0.1), y= ..prop.. ), stat= "count", vjust = -.5, size = 2) +
scale_y_continuous(labels = scales::percent_format(), limits = c(0,1)) +
scale_x_discrete(label = function(x) str_wrap(x,10)) +
theme(axis.text.y=element_blank(), axis.ticks.y=element_blank())
print(g1)
}
ind_name <- ind_name1$value[j]
if (ind_name != "MINORMLE"){
cat(" \n")
cat(" \n")
cat(paste('**Index:', paste(unique(index$D1)[j],collapse = ", "),'** \n'))
cat(" \n")
cat(" \n")
data <- ISC_desc %>% dplyr::select(COUNTRY, SENWGT_Gc1, all_of(ind_name), GENDER) %>%
mutate_at(c("GENDER", "COUNTRY"), as_factor) %>%
mutate(GENDER = factor(GENDER, levels = c("Male","Female"), labels = c("Boy", "Girl"))) %>%
na.omit()
me <- data %>% dplyr::select(SENWGT_Gc1, all_of(ind_name)) %>% summarise_at(ind_name, list(~ weighted.mean(., data$SENWGT_Gc1))) %>% data.frame() %>% pull
mg <- data %>% dplyr::select(GENDER, SENWGT_Gc1, all_of(ind_name)) %>% group_by(GENDER) %>%
summarise_at(ind_name, list(~ weighted.mean(., SENWGT_Gc1))) %>% mutate(GENDER = as.factor(GENDER)) %>% data.frame()
g4 <- data %>%
ggplot(aes_string(x = ind_name, y = paste("reorder(COUNTRY, desc(",ind_name,"), mean)"), group = "COUNTRY", fill = "COUNTRY")) +
geom_violin(aes(weight = SENWGT_Gc1), alpha = 0.5) +
geom_violin(aes(weight = SENWGT_Gc1), data = transform(data, GENDER = "Total"), alpha = 0.5) +
geom_boxplot(aes(weight = SENWGT_Gc1), width=0.1) +
geom_boxplot(aes(weight = SENWGT_Gc1), data = transform(data, GENDER = "Total"), width=0.1) +
geom_vline(aes_string(xintercept = ind_name), mg, linetype="dotted", size = 0.8) +
geom_vline(aes(xintercept = me), data = transform(data, GENDER = "Total"), linetype="dotted", size = 0.8) +
facet_grid(. ~ GENDER) +
ggtitle(str_wrap(tolower(eval(parse(text=paste0("attributes(data$", ind_name,")$label")))), 60)) +
ylab("Distribution of derived scale") +
xlab(paste0(ind_name,"- CIVED1999")) +
scale_color_brewer(palette="Accent") +
theme(legend.position = "none")
print(g4)
}
}
ha <- items %>% filter(name == "ICCS_2009")
cat(" \n")
cat(" \n")
cat('## ICCS 2009 \n')
cat(" \n")
cat(" \n")
for (j in 1:length(unique(ha$Construct))){
cat(" \n")
cat(" \n")
cat(paste0('### Construct: ', unique(ha$Construct)[j], ' \n'))
cat(" \n")
cat(" \n")
hl <- ha %>% filter(Construct == unique(ha$Construct)[j]) %>% na.omit()
constr_name <- hl$value
ind_name1 <- index1 %>% filter(name == "ICCS_2009") %>% na.omit()
ind_name <- ind_name1$value
cat(" \n")
cat(" \n")
cat(paste('**Items:', paste(constr_name,collapse = ", "),'** \n'))
cat(" \n")
cat(" \n")
for (i in 1:length(constr_name)){
data <- ISC_desc %>% dplyr::select(COUNTRY, SENWGT_Gc2, all_of(constr_name[i]), SGENDER) %>%
mutate_at(c(constr_name[i],"SGENDER"), as_factor) %>%
na.omit()
g2 <- data %>%
ggplot(aes_string(x = constr_name[i], y = "..prop..", group = "COUNTRY", fill = "COUNTRY")) +
geom_bar(aes(weight = SENWGT_Gc2), alpha = 0.5) +
geom_bar(aes(weight = SENWGT_Gc2), data = transform(data, SGENDER = "Total"), alpha = 0.5) +
facet_grid(COUNTRY ~ SGENDER, switch = "y") +
scale_fill_discrete(guide = FALSE) +
ggtitle(str_wrap(tolower(eval(parse(text=paste0("attributes(data$",constr_name[i],")$label")))), 60)) +
ylab("Weighted proportion of responses") +
xlab(paste0(constr_name[i],"- ICCS2009")) +
geom_text(aes(label = scales::percent(..prop.., accuracy = 0.1), y= ..prop.. ), stat= "count", vjust = -.5, size = 2) +
geom_text(data = transform(data, SGENDER = "Total"), aes(label = scales::percent(..prop.., accuracy = 0.1), y= ..prop.. ), stat= "count", vjust = -.5, size = 2) +
scale_y_continuous(labels = scales::percent_format(), limits = c(0,1)) +
scale_x_discrete(label = function(x) str_wrap(tolower(x),10)) +
theme(axis.text.y=element_blank(),axis.ticks.y=element_blank())
print(g2)
}
cat(" \n")
cat(" \n")
cat(paste('**Index:', paste(unique(index$D2)[j],collapse = ", "),'** \n'))
cat(" \n")
cat(" \n")
ind_name <- ind_name1$value[j]
data <- ISC_desc %>% dplyr::select(COUNTRY, SENWGT_Gc2, all_of(ind_name), SGENDER) %>%
mutate_at(c("SGENDER"), as_factor) %>%
na.omit()
me <- data %>% dplyr::select(SENWGT_Gc2, all_of(ind_name)) %>% summarise_at(ind_name, list(~ weighted.mean(., data$SENWGT_Gc2))) %>% data.frame() %>% pull
mg <- data %>% dplyr::select(SGENDER, SENWGT_Gc2, all_of(ind_name)) %>% group_by(SGENDER) %>%
summarise_at(ind_name, list(~ weighted.mean(., SENWGT_Gc2))) %>% mutate(SGENDER = as.factor(SGENDER)) %>% data.frame()
g5 <- data %>%
ggplot(aes_string(x = ind_name, y = paste("reorder(COUNTRY, desc(",ind_name,"), mean)"), group = "COUNTRY", fill = "COUNTRY")) +
geom_violin(aes(weight = SENWGT_Gc2), alpha = 0.5) +
geom_violin(aes(weight = SENWGT_Gc2), data = transform(data, SGENDER = "Total"), alpha = 0.5) +
geom_boxplot(aes(weight = SENWGT_Gc2), width=0.1) +
geom_boxplot(aes(weight = SENWGT_Gc2), data = transform(data, SGENDER = "Total"), width=0.1) +
geom_vline(aes_string(xintercept = ind_name), mg, linetype="dotted", size = 0.8) +
geom_vline(aes(xintercept = me), data = transform(data, SGENDER = "Total"), linetype="dotted", size = 0.8) +
facet_grid(. ~ SGENDER) +
ggtitle(str_wrap(tolower(eval(parse(text=paste0("attributes(data$", ind_name,")$label")))), 60)) +
ylab("Distribution of derived scale") +
xlab(paste0(ind_name,"- ICCS_2009")) +
scale_color_brewer(palette="Accent") +
theme(legend.position = "none")
print(g5)
}
ha <- items %>% filter(name == "ICCS_2016")
cat(" \n")
cat(" \n")
cat('## ICCS 2016 \n')
cat(" \n")
cat(" \n")
for (j in 1:length(unique(ha$Construct))){
cat(" \n")
cat(" \n")
cat(paste0('### Construct: ', unique(ha$Construct)[j], ' \n'))
cat(" \n")
cat(" \n")
hl <- ha %>% filter(Construct == unique(ha$Construct)[j]) %>% na.omit()
constr_name <- hl$value
ind_name1 <- index1 %>% filter(name == "ICCS_2016") %>% na.omit()
ind_name <- ind_name1$value
cat(" \n")
cat(" \n")
cat(paste('**Items:', paste(constr_name,collapse = ", "),'** \n'))
cat(" \n")
cat(" \n")
for (i in 1:length(constr_name)){
data <- ISC_desc %>% dplyr::select(COUNTRY, SENWGT_Gc3, all_of(constr_name[i]), S_GENDER) %>%
mutate_at(c(constr_name[i],"S_GENDER"), as_factor) %>%
na.omit()
g3 <- data %>%
ggplot(aes_string(x = str_wrap(constr_name[i],5), y = "..prop..", group = "COUNTRY", fill = "COUNTRY")) +
geom_bar(aes(weight = SENWGT_Gc3), alpha = 0.5) +
geom_bar(aes(weight = SENWGT_Gc3), data = transform(data, S_GENDER = "Total"), alpha = 0.5) +
facet_grid(COUNTRY ~ S_GENDER, switch = "y") +
scale_fill_discrete(guide = FALSE) +
ggtitle(str_wrap(tolower(eval(parse(text=paste0("attributes(data$",constr_name[i],")$label")))), 60)) +
ylab("Weighted proportion of responses") +
xlab(paste0(constr_name[i],"- ICCS2016"))+
geom_text(aes(label = scales::percent(..prop.., accuracy = 0.1), y= ..prop.. ), stat= "count", vjust = -.5, size = 2) +
geom_text(data = transform(data, S_GENDER = "Total"), aes(label = scales::percent(..prop.., accuracy = 0.1), y= ..prop.. ), stat= "count", vjust = -.5, size = 2) +
scale_y_continuous(labels = scales::percent_format(), limits = c(0,1)) +
scale_x_discrete(label = function(x) str_wrap(x,10)) +
theme(axis.text.y=element_blank(),axis.ticks.y=element_blank())
print(g3)
}
cat(" \n")
cat(" \n")
cat(paste('**Index:', paste(unique(index$D3)[j],collapse = ", "),'** \n'))
cat(" \n")
cat(" \n")
ind_name <- ind_name1$value[j]
data <- ISC_desc %>% dplyr::select(COUNTRY, SENWGT_Gc3, all_of(ind_name), S_GENDER) %>%
mutate_at(c("S_GENDER"), as_factor) %>%
na.omit()
me <- data %>% dplyr::select(SENWGT_Gc3, all_of(ind_name)) %>% summarise_at(ind_name, list(~ weighted.mean(., data$SENWGT_Gc3))) %>% data.frame() %>% pull
mg <- data %>% dplyr::select(S_GENDER, SENWGT_Gc3, all_of(ind_name)) %>% group_by(S_GENDER) %>%
summarise_at(ind_name, list(~ weighted.mean(., SENWGT_Gc3))) %>% mutate(S_GENDER = as.factor(S_GENDER)) %>% data.frame()
g6 <- data %>%
ggplot(aes_string(x = ind_name, y = paste("reorder(COUNTRY, desc(",ind_name,"), mean)"), group = "COUNTRY", fill = "COUNTRY")) +
geom_violin(aes(weight = SENWGT_Gc3), alpha = 0.5) +
geom_violin(aes(weight = SENWGT_Gc3), data = transform(data, S_GENDER = "Total"), alpha = 0.5) +
geom_boxplot(aes(weight = SENWGT_Gc3), width=0.1) +
geom_boxplot(aes(weight = SENWGT_Gc3), data = transform(data, S_GENDER = "Total"), width=0.1) +
geom_vline(aes_string(xintercept = ind_name), mg, linetype="dotted", size = 0.8) +
geom_vline(aes(xintercept = me), data = transform(data, S_GENDER = "Total"), linetype="dotted", size = 0.8) +
facet_grid(. ~ S_GENDER) +
ggtitle(str_wrap(tolower(eval(parse(text=paste0("attributes(data$", ind_name,")$label")))), 60)) +
ylab("Distribution of derived scale") +
xlab(paste0(ind_name,"- ICCS_2016")) +
scale_color_brewer(palette="Accent") +
theme(legend.position = "none")
print(g6)
}