-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcovid-datasets-analytics-api.R
65 lines (39 loc) · 1.78 KB
/
covid-datasets-analytics-api.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
install.packages("googleAnalyticsR")
library(googleAnalyticsR)
library(tidyr)
#set the ga view ID, start date, end date.
myid<-68455797
datestart<-"2020-06-01"
dateend<-"2021-12-08"
#read in the dataset list generated separately within the repo, then create a list of UUIDs
datasets<-read.table("COVID-dataset-list.csv", header=T, sep = ",", stringsAsFactors = F)
pagex<-datasets$id
#initialize a 2nd counter and a data frame
i<-1
output<-data.frame("a","b","c","d")
#loop over the API for each dataset in the CSV
for (p in pagex){
querypage<-paste0("ga:pagePath=@",pagex[i],sep="")
print(querypage)
web_data <- data.frame(google_analytics(myid,
date_range = c(datestart, dateend),
metrics = c("totalEvents","pageviews"),
dimensions = c("yearMonth"),
anti_sample = FALSE,
filtersExpression = querypage), pagex[i])
colnames(output)<-colnames(web_data)
output<-rbind(output,web_data, rownames=F, colnames=F)
i<-i+1
}
#clean up the output removing some artifacts
names(output)<-c("year/month", "downloads","pageviews","id")
outputclean<-output[ !output$id %in% c(FALSE,"d"), ]
write.table(outputclean,file="covid-datasets-analytics.csv", append=FALSE, sep=",",row.names=F,col.names=T, fileEncoding = "UTF-8")
#do a summary report
overallnumbers<-data.frame("")
overallnumbers$total<-"Total"
overallnumbers$yearmonth <- paste(datestart,"-",dateend)
overallnumbers$downloads<-sum(as.numeric(outputclean$downloads))
overallnumbers$pageviews<-sum(as.numeric(outputclean$pageviews))
overallnumbers$X..<-NULL
write.table(overallnumbers, file="covid-datasets-total-analytics.csv", append=TRUE, sep=",",row.names=F,col.names=F, fileEncoding = "UTF-8")