-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAnalysis4Abstract.R
155 lines (129 loc) · 8.19 KB
/
Analysis4Abstract.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
####################
######## BACKGROUND
####################
# #SURVEY
# This script concerns the survey entitled “Survey on data access conditions for sensitive data”.
# This survey was performed by The Dutch Open Data Infrastructure for Social Science and Economic Innovations (ODISSEI) and DANS, the Dutch national centre of expertise and repository for research data.
# The survey was launched in April 2024 and open until July 1st 2024.
# The goal of the survey was to gather additional information about access conditions and restrictions used by researchers using the DANS and ODISSEI services.
# #DATA
# The raw data that was used as input for this script is archived on Zenodo: https://doi.org/10.5281/zenodo.12805137
# It is under embargo until December 2024 as we are still in the process of analysing the entire dataset.
# #ANALYSIS BELOW
# The analyis below was performed for a conference submission.
# It mainly concerns computing basic counts of our answers as well as creating plots to visualize the results of the closed ended questions.
####################
######## ANALYSIS
####################
######## LOAD PACKAGES
library (tidyverse)
library (here)
library (readODS)
library (skimr)
library(dplyr)
library(ggpubr)
######## LOAD AND PREPARE DATA
#Loading Raw data from ODS file
RawData <- read_ods(here("Data","SurveyDataAccessConditions_RawResults.ods"))
#Removing the first two rows from the raw data file, because the variables actually only start in row 3
Data <- RawData[-c(1,2,3),]
#As for this analysis we are only interested in the closed questions, and Q1 (which can be recoded), we remove the rest of the columns.
Data <- Data[,-c(3,7,8,9,11,13,15,17,19,21,23,24,25,26,28,29,30),]
# I create a separate dataframe with all the labels for me to refer to things if needed and before I rename my variables
Labels <- data.frame (q_text = t(Data[1,]),q_id = c("Q1", "Q2", "Q3","Q4","Q5","Q9","Q10","Q11","Q12", "Q13","Q14", "Q15", "Q17"), q_shortname = c("Position", "Organisation", "Country", "ArchivedAtDANS", "ManageRestrictedData", "OtherThanResearch","Students", "Teaching", "LimitedIndividuals","Commercial","LimitedRegion","MotivationRequired","Costs"))
# I am adjusting the names of the rows to match the ones we will use for the Data
row.names(Labels) <-c("Position", "Organisation", "Country", "ArchivedAtDANS", "ManageRestrictedData", "OtherThanResearch","Students", "Teaching", "LimitedIndividuals","Commercial","LimitedRegion","MotivationRequired","Costs")
#Rename the variables in my Data to the short names so it will be easier to refer to them
names(Data) <-c("Position", "Organisation", "Country", "ArchivedAtDANS", "ManageRestrictedData", "OtherThanResearch","Students", "Teaching", "LimitedIndividuals","Commercial","LimitedRegion","MotivationRequired","Costs")
#From looking at the Data, we saw that in country, multiple spellings of "the Netherlands" were used and we want to ensure they are grouped together.
Data <- Data %>% mutate(Country = recode(Country, 'Netherlands' = 'The Netherlands', 'the Netherlands' = 'The Netherlands', 'NL' = 'The Netherlands', 'Nederland' = 'The Netherlands', 'netherlands' = 'The Netherlands'))
#We also want to recode all "It depends (please elaborate below)" so we loop through the variables
Data <- Data %>% mutate(OtherThanResearch = recode(OtherThanResearch, 'It depends (please elaborate below)' = 'Depends'))
Data <- Data %>% mutate(Students = recode(Students, 'It depends (please elaborate below)' = 'Depends'))
Data <- Data %>% mutate(Teaching = recode(Teaching, 'It depends (please elaborate below)' = 'Depends'))
Data <- Data %>% mutate(LimitedIndividuals = recode(LimitedIndividuals, 'It depends (please elaborate below)' = 'Depends', 'Yes (please elaborate below - what type of organisation(s) / individual(s))' ='Yes'))
Data <- Data %>% mutate(Commercial = recode(Commercial, 'It depends (please elaborate below)' = 'Depends'))
Data <- Data %>% mutate(LimitedRegion = recode(LimitedRegion, 'It depends (please elaborate below)' = 'Depends', 'Yes (please elaborate below - which region?)'='Yes'))
#We also want to replace the values in "Positions" with the recoded values.
#For this we load the csv file
repQ1 <- read.csv(here("Data/RecodedData","Q1 - Recoding - Sheet1.csv"))
Data$Position <-repQ1$Coded.Response
######## CREATING COUNTS AND PLOTS
#Create counts for Q1 Q2, Q3 and Q4, ordered in descending order
Res_Org <- Data %>% count(Organisation) %>% arrange(desc(n))
Res_Pos <- Data %>% count(Position) %>% arrange(desc(n))
Res_Country <- Data %>% count(Country) %>% arrange(desc(n))
Res_AtDANS <- Data %>% count(ArchivedAtDANS) %>% arrange(desc(n))
#Create plots for all other closed questions
for (i in 6: ncol(Data)) {
#because I wants able to figure out how to loop through assigning the variables based on i
if (i==6) {Input=Data$OtherThanResearch}
else if (i==7) {Input=Data$Students}
else if (i==8) {Input=Data$Teaching}
else if (i==9) {Input=Data$LimitedIndividuals}
else if (i==10) {Input=Data$Commercial}
else if (i==11) {Input=Data$LimitedRegion}
else if (i==12) {Input=Data$MotivationRequired}
else if (i==13) {Input=Data$Costs}
# #If you want to test the output with just one value, you can use
# i=6
# Input=Data$Students
PlotTitle=Labels[i,3]
plt<- ggplot(Data, aes (x=Input, fill=Input)) +
geom_bar() +
scale_fill_manual(values = c("No" = "darkred", "Yes" = "darkgreen", "Depends" = "orange", "NA" = "gray")) +
ggtitle(PlotTitle)+
ylim(0, 45) +
geom_text(stat = 'count', aes(label = ..count.., vjust = -1)) +
theme(text=element_text(size=16),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
legend.title=element_blank())
#theme(axis.text=element_text(size=12),
# axis.title=element_text(size=14))
plt
ggsave(paste0("Output/Plot_", PlotTitle, ".jpeg"))
}
####################
#THINGS I TRIED EARLIER AND I MAY WANT TO GET BACK TO
####################
#I am letting this code in here as I may want to reuse part of it later (I am still learning R so having some old code snippets still available might save me time later)
# ##Creating an overview of the answer categories so we can then easily create a count
# #a2=c("University", "University of Applied Sciences", "Research Institute", "Infrastructure provider", "Other")
# #a4=c("Yes - DANS EASY","Yes - DANS Data Station", "Yes - DataverseNL", "No")
# #a5_15_17=c("Yes", "No")
# #a14=c("Yes (please elaborate below - which region?)","No","It depends (please elaborate below)")
# #a_rest = c("Yes","No","It depends (please elaborate below)")
#
# #Trying to loop through things
# # for (i in 1: length(Labels[,1])) {
# # i=6
# # x=as.name("Country")
# # Data <- Data %>% mutate(x = recode(x, 'Netherlands' = 'The Netherlands', 'the Netherlands' = 'The Netherlands', 'NL' = 'The Netherlands', 'Nederland' = 'The Netherlands', 'netherlands' = 'The Netherlands'))
# #
# #
# # Data <- Data %>% mutate(as.name(names(Data[i])) = recode(as.name(names(Data[i])), 'It depends (please elaborate below)' = 'Depends'))
# #
# #Piechart
# pie5<- ggplot(Data, aes (x=factor(1), fill=ManageRestrictedData)) + geom_bar(width=1) + coord_polar("y") + ggtitle("ManageRestrictedData") + theme(text=element_text(size=15)) +theme(axis.text=element_text(size=15))
# pie5+ scale_fill_manual(values=c("#005773","#00ABAF", "#32ADD6")) + theme(axis.title.x = element_blank(),
# axis.title.y = element_blank(),
# panel.border = element_blank(),
# panel.grid=element_blank(),
# axis.ticks = element_blank(),
# plot.title=element_text(size=14, face="bold")) + ggtitle("ManageRestrictedData") + theme(text=element_text(size=15)) +theme(axis.text=element_text(size=15))
#
#+ geom_point(size=10) + ggtitle("This is the Title") + theme(text=element_text(size=20)) +theme(axis.text=element_text(size=30))
#pie + scale_fill_manual(values=c("#005773","#00ABAF", "#32ADD6"))
#pie + scale_fill_brewer(palette="Set1")
#pie + theme(axis.text.y=element_blank())
#+ geom_point(size=10)
#
# TRYING TO GET ALL PLOTS INTO ONE IMAGE
#templ<- paste("plot",i, sep="")
#assign this plot to the temp variable so we can save them all.
#assign(templ,plt)
#remove templ and plt
#rm(templ)
#rm(plt)
# pltall <- ggarrange(plot5,plot6,plot7,plot8,plot9, plot10, plot11, plot12, ncol = 2, nrow = 4)