-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path01_data_pull.R
73 lines (62 loc) · 1.99 KB
/
01_data_pull.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# PHOSP-Covid REDCap database analysis: API pull
# API pull from Leicester Oxford REDCap server
# Centre for Medical Informatics, Usher Institute, University of Edinburgh 2020
# To use this, set your REDCap API token as an environment variable.
## Uncomment and run the following line:
# usethis::edit_r_environ()
## this opens up .Renviron, add your token, e.g. ccp_token = 2F3xxxxxxxxxxxxE0111
## Restart R
# 1. API pull
# 2. Apply REDCap R formatitng, file edited.
# 3. Final object created: ccp_data
# Libraries
library(RCurl)
library(tidyverse)
library(REDCapR)
# Functions for safe api pull
rate = rate_backoff(pause_cap = 60*5, max_times = 10)
insistent_postForm = purrr::insistently(postForm, rate)
insistent_redcap_read = purrr::insistently(redcap_read, rate)
batch = function(.vector, .n = 200){
split(.vector, ceiling(seq_along(.vector)/.n))
}
# Get subjid
subjid = insistent_postForm(
uri='https://data.phosp.org/api/',
token = Sys.getenv("phosp_token"),
content='record',
'fields[0]'='study_id',
format='csv',
type='flat',
rawOrLabel='raw',
rawOrLabelHeaders='raw',
exportCheckboxLabel='false',
exportSurveyFields='false',
exportDataAccessGroups='false',
returnFormat='json'
) %>%
read_csv() %>%
distinct(study_id) %>%
pull(study_id)
# Get data in batches
data_raw = batch(subjid) %>%
map_df(~ insistent_redcap_read(
redcap_uri = "https://data.phosp.org/api/",
export_data_access_groups = TRUE,
token = Sys.getenv("phosp_token"),
records = .x,
guess_type = FALSE)$data
)
data = data_raw %>%
type_convert() %>%
as_tibble()
# Formating
## Update 11/05/2021
#source("/home/eharrison/phosp_clean/PHOSPCOVID19FU_R_2021-02-10_1805.r")
#source("/home/eharrison/phosp_clean/PHOSPCOVID19FU_R_2021-05-11_0708.r")
source("/home/eharrison/phosp_clean/PHOSPCOVID19FU_R_2021-06-30_1749.r")
# Out object and clean
phosp = data
# Remove empty columns
phosp = purrr::discard(phosp, ~all(is.na(.)))
rm(subjid, batch, rate, insistent_postForm, insistent_redcap_read)