-
Notifications
You must be signed in to change notification settings - Fork 21
/
Copy path5_createMetadata.r
193 lines (167 loc) · 9.64 KB
/
5_createMetadata.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
# File: 5_createMetadata.r
# Purpose: to summarize validation data and other information about the
# model and write it to a pdf. This pdf should accompany ALL sharing/showing
# of the SDM map.
# For knitr to work, you need MikTex installed. See http://miktex.org/
# load libraries ----
library(ROCR) #July 2010: order matters, see http://finzi.psych.upenn.edu/Rhelp10/2009-February/189936.html
library(randomForest)
library(knitr)
library(raster)
library(maptools)
library(sf)
library(RColorBrewer)
library(rasterVis)
library(RSQLite)
library(xtable)
library(stringi)
library(tables)
### find and load model data ----
## three lines need your attention. The one directly below (loc_scripts),
## about line 35 where you choose which Rdata file to use,
## and about line 46 where you choose which record to use
setwd(loc_model)
dir.create(paste0(model_species,"/outputs/metadata"), recursive = T, showWarnings = F)
setwd(paste0(model_species,"/outputs"))
load(paste0("rdata/", modelrun_meta_data$model_run_name,".Rdata"))
# get background poly data for the map (study area, reference boundaries)
studyAreaExtent <- st_read(nm_studyAreaExtent, quiet = T) # study area
referenceBoundaries <- st_read(nm_refBoundaries, quiet = T) # name of state boundaries file
r <- dir(path = "model_predictions", pattern = ".tif$",full.names=FALSE)
fileName <- r[gsub(".tif", "", r) == model_run_name]
ras <- raster(paste0("model_predictions/", fileName))
# project to match raster, just in case
studyAreaExtent <- st_transform(studyAreaExtent, as.character(ras@crs))
referenceBoundaries <- st_transform(referenceBoundaries, as.character(ras@crs))
## Get Program and Data Sources info ----
op <- options("useFancyQuotes")
options(useFancyQuotes = FALSE)
db <- dbConnect(SQLite(),dbname=nm_db_file)
SQLquery <- paste("Select lkpModelers.ProgramName, lkpModelers.FullOrganizationName, ",
"lkpModelers.City, lkpModelers.State, lkpSpecies.sp_code ",
"FROM lkpModelers ",
"INNER JOIN lkpSpecies ON lkpModelers.ModelerID=lkpSpecies.ModelerID ",
"WHERE lkpSpecies.sp_code='", model_species, "'; ", sep="")
sdm.modeler <- dbGetQuery(db, statement = SQLquery)
# NOTE: use column should be populated with 1/0 for sources of data used
SQLquery <- paste("SELECT sp.sp_code, sr.ProgramName, sr.State ",
"FROM lkpSpecies as sp ",
"INNER JOIN mapDataSourcesToSpp as mp ON mp.EGT_ID=sp.EGT_ID ",
"INNER JOIN lkpDataSources as sr ON mp.DataSourcesID=sr.DataSourcesID ",
# "WHERE mp.use = 1 ",
"AND sp.sp_code ='", model_species, "'; ", sep="")
sdm.dataSources <- dbGetQuery(db, statement = SQLquery)
sdm.dataSources <- sdm.dataSources[order(sdm.dataSources$ProgramName),]
SQLquery <- paste("SELECT model_end_time date, egt_id, metadata_comments comments",
" FROM tblModelResults ",
"WHERE model_run_name ='", model_run_name, "'; ", sep="")
sdm.customComments <- dbGetQuery(db, statement = SQLquery)
# assume you want the most recently entered comments, if there are multiple entries
if(nrow(sdm.customComments) > 1) {
sdm.customComments <- sdm.customComments[order(sdm.customComments$date, decreasing = TRUE),]
sdm.customComments.subset <- sdm.customComments[1,]
} else {
sdm.customComments.subset <- sdm.customComments
}
## Get threshold information ----
SQLquery <- paste("Select ElemCode, dateTime, cutCode, cutValue, capturedEOs, capturedPolys, capturedPts ",
"FROM tblModelResultsCutoffs ",
"WHERE model_run_name ='", model_run_name, "'; ", sep="")
sdm.thresholds <- dbGetQuery(db, statement = SQLquery)
# filter to only most recent
#uniqueTimes <- unique(sdm.thresholds$dateTime)
#mostRecent <- uniqueTimes[order(uniqueTimes, decreasing = TRUE)][[1]]
#sdm.thresholds <- sdm.thresholds[sdm.thresholds$dateTime == mostRecent,]
# get info about thresholds
SQLquery <- paste("SELECT cutCode, cutFullName, cutDescription, cutCitationShort, cutCitationFull, sortOrder ",
"FROM lkpThresholdTypes ",
"WHERE cutCode IN (",
toString(sQuote(sdm.thresholds$cutCode)),
");", sep = "")
sdm.thresh.info <- dbGetQuery(db, statement = SQLquery)
sdm.thresh.merge <- merge(sdm.thresholds, sdm.thresh.info)
#sort it
sdm.thresh.merge <- sdm.thresh.merge[order(sdm.thresh.merge$sortOrder),]
sdm.thresh.table <- sdm.thresh.merge[,c("cutFullName", "cutValue",
"capturedEOs", "capturedPolys", "capturedPts", "cutDescription")]
names(sdm.thresh.table) <- c("Threshold", "Value", "EOs","Polys","Pts","Description")
sdm.thresh.table$EOs <- paste(round(sdm.thresh.table$EOs/numEOs*100, 1),
"(",sdm.thresh.table$EOs, ")", sep="")
sdm.thresh.table$Polys <- paste(round(sdm.thresh.table$Polys/numPys*100, 1),
"(",sdm.thresh.table$Polys, ")", sep="")
numPts <- nrow(subset(df.full, pres == 1))
sdm.thresh.table$Pts <- paste(round(sdm.thresh.table$Pts/numPts*100, 1),
sep="")
# get grank definition
SQLquery <- paste0("SELECT rank, rankname FROM lkpRankDefinitions where rank = '",ElementNames$rounded_g_rank,"';", sep="")
grank_desc <- dbGetQuery(db, SQLquery)
# get Model Evaluation and Use data
SQLquery <- paste("Select spdata_dataqual, spdata_abs, spdata_eval, envvar_relevance, envvar_align, process_algo, process_sens, process_rigor, process_perform, process_review, products_mapped, products_support, products_repo, interative, spdata_dataqualNotes, spdata_absNotes, spdata_evalNotes, envvar_relevanceNotes, envvar_alignNotes, process_algoNotes, process_sensNotes, process_rigorNotes, process_performNotes, process_reviewNotes, products_mappedNotes, products_supportNotes, products_repoNotes, interativeNotes ",
"FROM tblRubric ",
"WHERE model_run_name ='", model_run_name, "'; ", sep="")
sdm.modeluse <- dbGetQuery(db, statement = SQLquery)
sdm.modeluse[sdm.modeluse=="I"] <- "\\cellcolor[HTML]{9AFF99} Ideal"
sdm.modeluse[sdm.modeluse=="A"] <- "\\cellcolor[HTML]{FFFFC7} Acceptable"
sdm.modeluse[sdm.modeluse=="P"] <- "\\cellcolor[HTML]{FD6864} Problematic"
# Get env. var lookup table
SQLquery <- paste0("SELECT gridName g from tblModelResultsVarsUsed where model_run_name = '",
model_run_name, "' and inFinalModel = 1;")
var_names <- dbGetQuery(db, SQLquery)$g
SQLquery <- paste("SELECT fullName, description ",
"FROM lkpEnvVars ",
"WHERE gridName COLLATE NOCASE IN (",
toString(sQuote(var_names)),
") ORDER BY fullName;", sep = "")
sdm.var.info <- dbGetQuery(db, statement = SQLquery)
names(sdm.var.info) <- c("Variable Name","Variable Description")
# get Model Evaluation and Use data
SQLquery <- paste("Select spdata_dataqual, spdata_abs, spdata_eval, envvar_relevance, envvar_align, process_algo, process_sens, process_rigor, process_perform, process_review, products_mapped, products_support, products_repo, interative, spdata_dataqualNotes, spdata_absNotes, spdata_evalNotes, envvar_relevanceNotes, envvar_alignNotes, process_algoNotes, process_sensNotes, process_rigorNotes, process_performNotes, process_reviewNotes, products_mappedNotes, products_supportNotes, products_repoNotes, interativeNotes ",
"FROM tblRubric ",
"WHERE model_run_name ='", model_run_name, "'; ", sep="")
sdm.modeluse <- dbGetQuery(db, statement = SQLquery)
sdm.modeluse[sdm.modeluse=="I"] <- "\\cellcolor[HTML]{9AFF99} Ideal"
sdm.modeluse[sdm.modeluse=="A"] <- "\\cellcolor[HTML]{FFFFC7} Acceptable"
sdm.modeluse[sdm.modeluse=="P"] <- "\\cellcolor[HTML]{FD6864} Problematic"
# escape symbols for latex
ls <- c("&","%","$","#","_","{","}")
for (l in ls) {
sdm.var.info$`Variable Name` <- gsub(l, paste0("\\",l), sdm.var.info$`Variable Name`, fixed = T)
sdm.var.info$`Variable Description` <- gsub(l, paste0("\\",l), sdm.var.info$`Variable Description`, fixed = T)
}
# replace degree symbols
for (l in 1:length(sdm.var.info$`Variable Description`)) {
new.desc <- stri_escape_unicode(sdm.var.info$`Variable Description`[l])
if (grepl("\\u00b0",new.desc, fixed = T))
sdm.var.info$`Variable Description`[l] <- gsub("\\u00b0", "$^\\circ$", new.desc, fixed = T)
}
# put descriptions in parboxes for multiple lines
sdm.var.info$`Variable Description` <- paste0("\\parbox{20cm}{",sdm.var.info$`Variable Description`,"}")
## Run knitr and create metadata ----
# writing to the same folder as a grid might cause problems.
# if errors check that first
# more explanation: tex looks for and uses aux files, which are also used
# by esri. If there's a non-tex aux file, knitr bails.
# Also, might need to run this twice. First time through tex builds the reference
# list, second time through it can then number the refs in the doc.
setwd("metadata")
# knit2pdf errors for some reason...just knit then call directly
knit(paste(loc_scripts,"MetadataEval_knitr.rnw",sep="/"), output=paste(model_run_name, ".tex",sep=""))
call <- paste0("pdflatex -interaction=nonstopmode ",model_run_name , ".tex")
# call <- paste0("pdflatex -halt-on-error -interaction=nonstopmode ",model_run_name , ".tex") # this stops execution if there is an error. Not really necessary
system(call)
system(call) # 2nd run to apply citation numbers
# delete .txt, .log etc if pdf is created successfully.
fn_ext <- c(".log",".aux",".out")
if (file.exists(paste(model_run_name, ".pdf",sep=""))){
#setInternet2(TRUE)
#download.file(fileURL ,destfile,method="auto")
for(i in 1:NROW(fn_ext)){
fn <- paste(model_run_name, fn_ext[i],sep="")
if (file.exists(fn)){
file.remove(fn)
}
}
}
## clean up ----
dbDisconnect(db)
options(op)