-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathEJP-license.R
61 lines (47 loc) · 1.6 KB
/
EJP-license.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# task: content of ejpsoil catalogue at https://catalogue.ejpsoil.eu
# load libraries ----
library(jsonlite)
library(xlsx)
library(dplyr)
# create an empty data frame to store the assessment ----
columns <- c("license", "canonical")
# pass this vector length to ncol parameter
# and nrow with 0
checks <- data.frame(matrix(nrow = 0, ncol = length(columns)))
# assign column names
colnames(checks) <- columns
# the catalog allows the offset of 50 elements, and have in total 577 ----
nmax <- 577
offset_i <- seq(from = 0, to = nmax, by = 50)
for (i in offset_i){
# load JSON catalog from url
EJP_catalog <- fromJSON(paste0("https://catalogue.ejpsoil.eu/collections/metadata:main/items?offset=", i))
# get links
links <- EJP_catalog$features$links
# get license
license <- EJP_catalog$features$properties$license # vector
# first row to write the records in the iteration
j = i + 1
# fill "license" ----
if (is.null(license)){
checks[j:(i+length(license)), "license"] <- NA
} else {
checks[j:(i+length(license)), "license"] <- license
}
# fill canonical ----
# extract github link
l <- j
for (k in seq_along(links)){
canonical_k <- subset(links[[k]], type == "canonical")
checks[l, "canonical"] <- canonical_k$href
l <- l + 1
}
}
# count uniques
uniq_licenses <- checks %>%
group_by(license) %>%
summarise(count = n())
write.xlsx(uniq_licenses, file = "~/ISRIC_Workspace/scratch/diana/licenses.xlsx",
sheetName = "unique")
write.xlsx(checks, file = "~/ISRIC_Workspace/scratch/diana/licenses.xlsx",
sheetName = "all", append = TRUE)