-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathread_qog.R
160 lines (135 loc) · 6.36 KB
/
read_qog.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# This file is part of the rQog-package (https://github.com/muuankarski/rqog)
# Copyright (C) 2012-2022 Markus Kainu <markuskainu@gmail.com>. All rights reserved.
# This program is open source software; you can redistribute it and/or modify
# it under the terms of the FreeBSD License (keep this notice):
# http://en.wikipedia.org/wiki/BSD_licenses
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#' Retrieve and Read Quality of Government Institute data in R
#'
#' Download data from Quality of Government Institute Basic data at
#' \url{http://www.qog.pol.gu.se/data/}
#'
#' @param which_data A string. Specify the name of the QoG data set to retrieve. Currently available \code{"basic"}, \code{"standard"}, \code{"oecd"} or \code{"social_policy"}.
#' @param data_type A string. Specify whether you want cross-sectional or time-series QoG data set to retrieve. Currently available \code{"cross-sectional"} or \code{"time-series"}.
#' @param year numeric. Specify the year of the dataset. Year refers to the publication year of the dataset not the year of a particular data point. Default \code{2019}
#' @param data_dir a path to a cache directory. The directory have to exist.
#' The \code{NULL} (default) uses and creates
#' 'rqog' directory in the temporary directory from
#' \code{\link{tempdir}}.
#' @param file_format A string. Specify the file format you want to download and import. Currently available \code{"csv"},\code{"dta"}, \code{"sav"} or \code{"xlsx"}.
#' @param download_only a logical whether to only download and save the dataset and skip importing in R. Default is \code{FALSE}.
#' @param cache a logical whether to do caching. Default is \code{TRUE}.
#' @param update_cache a locigal whether to update cache. Default is \code{FALSE}.
#'
#' @return data.frame
#'
#' @export
#' @examples # dat <- read_qog(which_data = "basic", year = 2019, data_dir="data")
#' @author Markus Kainu <markus.kainu(at)kapsi.fi>
read_qog <- function(which_data = "basic",
data_type="time-series",
year = 2023,
data_dir = NULL,
file_format = "csv",
download_only = FALSE,
cache = TRUE,
update_cache = FALSE) {
month = "jan"
latest_year <- 2023
# checks
if (!year %in% 2008:latest_year) stop(glue("Data has been published between 2008 and {latest_year}"))
if (year != 2019 & file_format == "xlsx") stop("Only the latest data is available in .xlsx-format. Archived data only in .dat, .sav or .csv format")
if (year == 2014) stop("No data was published in 2014")
# basic data
if (which_data == "basic"){
if (year %in% 2008:2011) stop("Basic Data was not published in 2008 - 2011")
if (year == 2012) month <- "21may"
if (year == 2013) month <- "30aug"
}
if (which_data == "standard"){
if (year == 2008) month <- "15may"
if (year == 2009) month <- "17jun"
if (year == 2010) month <- "27may"
if (year == 2011) month <- "6apr"
if (year == 2012) stop("Standard Data was not published in 2012")
if (year == 2013) month <- "20dec"
}
if (which_data == "oecd"){
if (year %in% 2008:2013) stop("Oecd Data was not published in 2008-2013")
}
if (which_data == "social_policy"){
if (year == 2008) month <- "4nov"
if (year == 2009) stop("Social Policy Data was not published in 2009")
if (year == 2010) month <- "22feb"
if (year == 2011) stop("Social Policy Data was not published in 2011")
if (year == 2012) month <- "4apr"
if (year %in% 2013:2018) stop(glue("Social Policy Data was not published in {year}"))
}
# Beginning of the URL's for latest data
if (year == latest_year){
data.url.begin <- "http://www.qogdata.pol.gu.se/data/"
} else {
data.url.begin <- "http://www.qogdata.pol.gu.se/dataarchive/"
}
if (!(which_data %in% c("basic","standard","std","oecd"))) stop('Wrong data name, use "basic","standard" or "oecd" instead')
if (!(data_type %in% c("time-series","cross-sectional"))) stop('Wrong data type, use "time-series" or "cross-sectional" instead')
if (file_format == "csv") file_ext <- "csv"
if (file_format == "dta") file_ext <- "dta"
if (file_format == "sav") file_ext <- "sav"
if (file_format == "xlsx") file_ext <- "xlsx"
if (which_data == "basic") dname <- "bas"
if (which_data == "standard") dname <- "std"
if (which_data == "std") dname <- "std"
if (which_data == "oecd") dname <- "oecd"
if (data_type == "cross-sectional") dtype <- "cs"
if (data_type == "time-series") dtype <- "ts"
if (year == 2023 && file_format == "dta"){
postfix <- "_stata14"
} else {
postfix <- ""
}
file.name <- glue::glue("qog_{dname}_{dtype}_{month}{substr(year, 3, 4)}{postfix}.{file_ext}")
# creating local file path
# Check whether to local file exist.
# If it does not exist then download it
if (cache){
cache_dir <- data_dir
# check option for update
# get cache directory
if (is.null(cache_dir)){
cache_dir <- file.path(tempdir(), "rqog")
if (!file.exists(cache_dir)) dir.create(cache_dir)
} else {
if (!file.exists(cache_dir)) {
stop("The folder ", cache_dir, " does not exist")
}
}
# cache filename
cache_file <- file.path(cache_dir, file.name)
}
# if cache = FALSE or update or new: dowload else read from cache
if (!cache || update_cache || !file.exists(cache_file)){
if (file_format == "xlsx"){
mode <- "wb"
} else {
mode <- "w"
}
data.url <- glue("{data.url.begin}{file.name}")
message(glue("Local file not found. \n Downloading QoG {file.name} data \n from {data.url}\n in file: {cache_file}\n"))
download.file(data.url, destfile = cache_file, mode = mode)
}
if (!download_only){
if (cache & file.exists(cache_file)) {
cf <- path.expand(cache_file)
message(glue("Reading cache file {cf}"))
# y <- readRDS(cache_file)
if (file_format == "csv") dd <- read.csv(cache_file, sep=",", stringsAsFactors = FALSE)
if (file_format == "dta") dd <- haven::read_dta(cache_file)
if (file_format == "sav") dd <- read_sav(cache_file)
if (file_format == "xlsx") dd <- read_excel(cache_file)
}
return(dd)
}
}