-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheck_data_manually.R
72 lines (64 loc) · 3.39 KB
/
check_data_manually.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#==== For Jan ========================================
set.seed(123456)
v_literature_sampleJan <- rbind(
v_pdf_summary %>% filter(hasXML == TRUE, type == "article") %>% sample_n(4),
v_pdf_summary %>% filter(hasXML == TRUE, type == "working paper") %>% sample_n(3),
v_pdf_summary %>% filter(hasXML == TRUE, type == "report") %>% sample_n(3))
write.csv2(v_literature_sampleJan, file = file.path("manual check", "check_forJan", "SDGTM_literature_sample_forJan.csv"))
v_data_xmlforJan1 <- v_data_xml_df %>%
mutate(pdf_doc = substr(basename(doc), 1, nchar(basename(doc))-4)) %>%
filter(pdf_doc %in% v_literature_sampleJan$filename)
fnames <- substr(basename(unique(v_data_xmlforJan1$doc)), 1, nchar(basename(unique(v_data_xmlforJan1$doc)))-4)
for (kpdf in fnames) {
cur_rmd <- file.path("manual check", "check_forJan", paste0(substr(kpdf,1,nchar(kpdf)-4), ".Rmd"))
file.copy("generate_list_docs.Rmd", cur_rmd)
cur_dt <- v_literature_sampleJan$type[which(v_literature_sampleJan$filename == kpdf)]
rmarkdown::render(cur_rmd, params = list(
doctype = cur_dt,
data = v_data_xmlforJan1 %>% filter(pdf_doc == kpdf)
))
}
# write.csv2(v_data_xmlforJan1, file = "SDGTM_xml_structure_forJan.csv")
# v_data_xmlforJan2 <- v_data_xml_df_1doc_per_doc %>%
# mutate(pdf_doc = substr(basename(doc), 1, nchar(basename(doc))-4)) %>%
# filter(pdf_doc %in% v_literature_sampleJan$filename)
# write.csv(v_data_xmlforJan2, file = "SDGTM_xml_collapsed_forJan.csv", quote = TRUE)
#==== For Jerome ========================================
set.seed(123)
v_literature_sampleJerome <- rbind(
v_pdf_summary %>%
mutate(pdf_doc = basename(paste(filename))) %>%
filter(!pdf_doc %in% paste(v_literature_sampleJan$filename)) %>%
filter(hasXML == TRUE, type == "article") %>%
sample_n(4),
v_pdf_summary %>%
mutate(pdf_doc = basename(paste(filename))) %>%
filter(!pdf_doc %in% paste(v_literature_sampleJan$filename)) %>%
filter(hasXML == TRUE, type == "working paper") %>%
sample_n(3),
v_pdf_summary %>%
mutate(pdf_doc = basename(paste(filename))) %>%
filter(!pdf_doc %in% paste(v_literature_sampleJan$filename)) %>%
filter(hasXML == TRUE, type == "report") %>%
sample_n(3))
write.csv2(v_literature_sampleJerome, file = file.path("manual check", "check_forJerome", "SDGTM_literature_sample_forJerome.csv"))
v_data_xml_df <- v_data_xml %>%
do.call("rbind", .)
v_data_xmlforJerome1 <- v_data_xml_df %>%
mutate(pdf_doc = substr(basename(doc), 1, nchar(basename(doc))-4)) %>%
filter(pdf_doc %in% v_literature_sampleJerome$filename)
fnames <- substr(basename(unique(v_data_xmlforJerome1$doc)), 1, nchar(basename(unique(v_data_xmlforJerome1$doc)))-4)
for (kpdf in fnames) {
cur_rmd <- file.path("manual check", "check_forJerome", paste0(substr(kpdf,1,nchar(kpdf)-4), ".Rmd"))
file.copy("generate_list_docs.Rmd", cur_rmd)
cur_dt <- v_literature_sampleJerome$type[which(v_literature_sampleJerome$filename == kpdf)]
rmarkdown::render(cur_rmd, params = list(
doctype = cur_dt,
data = v_data_xmlforJerome1 %>% filter(pdf_doc == kpdf)
))
}
# write.csv2(v_data_xmlforJerome1, file = "SDGTM_xml_structure_forJerome.csv")
# v_data_xmlforJerome2 <- v_data_xml_df_1doc_per_doc %>%
# mutate(pdf_doc = substr(basename(doc), 1, nchar(basename(doc))-4)) %>%
# filter(pdf_doc %in% v_literature_sampleJerome$filename)
# write.csv2(v_data_xmlforJerome2, file = "SDGTM_xml_collapsed_forJerome.csv")