forked from jeffcnz/Hilltop
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHilltop.R
275 lines (235 loc) · 11.3 KB
/
Hilltop.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
#Functions to interact with a Hilltop server and provide data in R data frames.
# Jeff Cooke May 2016
#Install the required packages
pkgs <- c('XML', 'reshape2', 'plyr', 'RCurl')
if(!all(pkgs %in% installed.packages()[, 'Package']))
install.packages(pkgs, dep = T)
require(XML)
require(reshape2)
require(plyr)
require(RCurl)
#Get a list of sites available from a service
#May be able to tidy this and use similar method to getting measurement data.
hilltopSiteList <- function(sitexml) {
#Takes an parsed xml document from a Hilltop SiteList request.
#Returns a dataframe of the available sites and location if available.
stemp <- do.call(rbind, xpathApply(sitexml, "/HilltopServer/Site", function(node) {
xp <- "./*"
site <- xmlGetAttr(node, "Name")
if(length(xmlChildren(node)) < 2) {
attribute <- "NoLocation"
value <- "NA"
} else {
attribute <- xpathSApply(node, xp, xmlName)
value <- xpathSApply(node, xp, xmlValue)
}
data.frame(site, attribute, value, stringsAsFactors = FALSE)
} ) )
castsite <- dcast(stemp, site ~ attribute, value.var = "value")
if(!is.null(castsite$NoLocation)) {
castsite <- subset(castsite, select = -c(NoLocation) )
}
return(castsite)
}
hilltopDsMeasList <- function(measlistxml) {
#Helper function.
#Takes an xml document from a Hilltop MeasurementList at a Site request.
#Returns a dataframe of the datasource information and measurements names.
dstemp <- do.call(rbind, xpathApply(measlistxml, "/HilltopServer/DataSource", function(node) {
xp <- "./*"
datasource <- xmlGetAttr(node, "Name")
type <- xpathSApply(node, "./TSType", xmlValue)
datasourceid <- paste(type, datasource)
attribute <- xpathSApply(node, xp, xmlName)
value <- xpathSApply(node, xp, function(x) {
if(xmlName(x) == "Measurement") {xmlGetAttr(x, "Name") } else {xmlValue(x) }
} )
data.frame(datasourceid, datasource, attribute, value, stringsAsFactors = FALSE)
} ) )
ds <- subset(dstemp, attribute != "Measurement")
meas <- subset(dstemp, attribute == "Measurement", select = c("datasourceid", "value") )
colnames(meas) [which(names(meas) == "value") ] <- "MeasurementName"
castds <- dcast(ds, datasourceid + datasource ~ attribute, value.var = "value")
castds <- merge(castds, meas, all = TRUE)
castds <- subset(castds, select= -c(datasourceid) )
return(castds)
}
hilltopMeasInfoList <- function(measlistxml) {
#Helper function.
#Takes an xml document from a Hilltop MeasurementList at a Site request.
#Returns a dataframe of the measurement information and datasources.
dstemp <- do.call(rbind, xpathApply(measlistxml, "/HilltopServer/DataSource/Measurement", function(node) {
xp <- "./*"
datasource <- xpathSApply(node, "..", function(x) {xmlGetAttr(x, "Name") } )
MeasurementName <- xmlGetAttr(node, "Name")
measurementid <- paste(datasource, MeasurementName)
attribute <- xpathSApply(node, xp, xmlName)
value <- xpathSApply(node, xp, xmlValue)
data.frame(measurementid, datasource, MeasurementName, attribute, value, stringsAsFactors = FALSE)
} ) )
castmeas <- dcast(dstemp, measurementid + datasource + MeasurementName ~ attribute, value.var = "value")
castmeas <- subset(castmeas, select = -c(measurementid) )
return(castmeas)
}
hilltopDsMeasListFull <- function(measlistxml) {
#Takes an xml document from a Hilltop MeasurementList at a Site request.
#Returns a dataframe of all of the datasource and measurement information combined.
t<-hilltopDsMeasList(measlistxml)
m<-hilltopMeasInfoList(measlistxml)
full<-merge(t, m, by = c("datasource", "MeasurementName") , all = TRUE)
return(full)
}
hilltopMeasInfoListExtra <- function(measlistxml) {
#Helper function.
#Takes an xml document from a Hilltop MeasurementList at a Site request.
#Returns a dataframe of the measurement information and datasource info.
dstemp <- do.call(rbind, xpathApply(measlistxml, "/HilltopServer/DataSource/Measurement", function(node) {
xp <- "./*"
datasource <- xpathSApply(node, "..", function(x) {xmlGetAttr(x, "Name") } )
MeasurementName <- xmlGetAttr(node, "Name")
TSType <- xpathSApply(node, "../TSType", xmlValue)
DataType <- xpathSApply(node, "../DataType", xmlValue)
Interpolation <- xpathSApply(node, "../Interpolation", xmlValue)
From <- xpathSApply(node, "../From", xmlValue)
To <- xpathSApply(node, "../To", xmlValue)
attribute <- xpathSApply(node, xp, xmlName)
value <- xpathSApply(node, xp, xmlValue)
data.frame(datasource, MeasurementName, TSType, DataType, Interpolation, From, To, attribute, value, stringsAsFactors = FALSE)
} ) )
castmeas <- dcast(dstemp, datasource + TSType + DataType + Interpolation + From + To + MeasurementName ~ attribute, value.var = "value")
return(castmeas)
}
hilltopValueHelper <- function(x) {
#Helper function to return the appropriate xml value depending whether the value of interest is from a named node,
#or is a named parameter.
if(xmlName(x) != "T") {
if(xmlName(x) == "Parameter") {
return(xmlGetAttr(x, "Value"))
} else {return(xmlValue(x)) }
}
}
hilltopAttributeHelper <- function(x) {
#Helper function to return the appropriate xml attribute name depending whether the attribute of interest is from a named node,
#or is a named parameter.
if(xmlName(x) != "T") {
if(xmlName(x) == "Parameter") {
return(xmlGetAttr(x, "Name"))
} else {return(xmlName(x)) }
}
}
hilltopMeasurementToDF <- function(dataxml) {
#Helper function that reads the nodes within a the Measurement node of a Hilltop XML response
#from a anyXmlParse(url) request such as dataxml<-anyXmlParse(url).
#Returns a dataframe of the data for each timestamp.
#Handles missing results and doen't require prior knowledge of parameter names.
#Handles true measurements and WQ Sample requests
idNodes <- getNodeSet(dataxml, "//Measurement/Data/E")
Times <- lapply(idNodes, xpathApply, path = "./T", xmlValue)
values <- lapply(idNodes, xpathApply, path = "./*", hilltopValueHelper)
attributes <- lapply(idNodes, xpathApply, path = "./*", hilltopAttributeHelper)
data <- do.call(rbind.data.frame, Reduce(function(x,y) Map(cbind, x, y), list(Times, attributes, values)))
names(data) <- c("Time", "Attribute", "Content")
data <- data[!(data$Attribute == "NULL"), ]
data <- data.frame(lapply(data, as.character), stringsAsFactors = FALSE)
cdata <- dcast(data, Time ~ Attribute, value.var = "Content")
cdata$Time <- as.POSIXct(strptime(cdata$Time, format = "%Y-%m-%dT%H:%M:%S"))
colnames(cdata)[colnames(cdata)=="I1"] <- "Value"
return(cdata)
}
hilltopDataSourceToDF<-function(dataxml) {
#Helper function that reads the nodes within a the DataSource ItemInfo node of a Hilltop XML response
#from a anyXmlParse(url) request such as dataxml<-anyXmlParse(url).
#Returns a dataframe of the Info for each Item.
#Handles missing results and doen't require prior knowledge of the items.
idNodes <- getNodeSet(dataxml, "//Measurement/DataSource")
Item <- lapply(idNodes, xpathApply, path = "./ItemInfo", xmlGetAttr, "ItemNumber")
values <- lapply(idNodes, xpathApply, path = "./ItemInfo/*", hilltopValueHelper)
attributes <- lapply(idNodes, xpathApply, path = "./ItemInfo/*", hilltopAttributeHelper)
data <- data.frame(Attribute = unlist(attributes), Content = unlist(values))
data$Item <- unlist(Item)
data <- data[!(data$Attribute == "NULL"), ]
data <- data.frame(lapply(data, as.character), stringsAsFactors = FALSE)
cdata <- dcast(data, Item ~ Attribute, value.var = "Content")
return(cdata)
}
hilltopMeasurement<-function(dataxml){
#Main function that converts a Hilltop XML document
#from a anyXmlParse(url) request such as dataxml<-anyXmlParse(url)
#for a water quality measurement into a
#dataframe that contains the measurement information.
#It returns a dataframe of the data for each timestamp, including DataSource Information and the Site Name.
#This dataframe can be merged with a WQ Sample dataframe processed using xmlHilltopMeasurementToDF
Site <- dataxml[["string(//Measurement/@SiteName)"]]
df <- hilltopMeasurementToDF(dataxml)
df$Site <- Site
items <- hilltopDataSourceToDF(dataxml)
df$Measurement <- items$ItemName
df$Units <- if(is.null(items$Units)) {c("")} else {items$Units}
return(df)
}
is.hilltopXml <- function(xmldata){
#checks if an xml document is hilltop xml, returns True or False accordingly.
server <- xmlName(xmlRoot(xmldata))
if(length(grep("Hilltop",server))>0) {
return(TRUE)
} else {
return(FALSE)
}
}
anyXmlParse <- function(url) {
#Helper function to parse data from a hilltop server.
#Takes a valid url as an input and returns a parsed xml document ready for other functions.
#Handles https requests as well as http
if(length(grep("https",url))>0){
doc <- getURL(url, ssl.verifypeer = FALSE)
return(xmlParse(doc))
} else {return(xmlParse(url))}
}
hilltopEnsembleStatBkgnd<-function(dataxml){
#Helper Function that takes the parsed xml from an Ensemble Statistics request.
#Returns a single line dataframe of the STatistics background information
#This needs to be combined with the stats themselves to get a full dataframe.
bgtemp<-do.call(rbind, xpathApply(dataxml, "/HilltopServer", function(node) {
xp <- "./*"
attribute<-xpathSApply(node, xp, xmlName)
value<-xpathSApply(node, xp, function(x){
if(xmlName(x) %in% c("Hour", "Day", "Month")){xmlGetAttr(x,"Name")}else{xmlValue(x)}
})
data.frame(attribute, value, stringsAsFactors = FALSE)
}))
bgtemp<-subset(bgtemp, !attribute %in% c("Hour", "Day", "Month"))
fintemp = setNames(data.frame(t(bgtemp[,-1])), bgtemp[,1])
return(fintemp)
}
hilltopEnsembleStatByTimePeriod<-function(dataxml){
#Helper function that takes parsed xml from an EnsembleStats Request.
#Returns the statistics for each time period (depending whether hourly, monthly or annual stats)
period <- function(dataxml) {
#Helper function to determine what the measurement period of the EnsembleStats is.
if (length(xpathApply(dataxml, "/HilltopServer/Hour", xmlGetAttr, "Hour"))>0) {
return("Hour")} else if
(length(xpathApply(dataxml, "/HilltopServer/Day", xmlGetAttr, "Day"))>0) {
return("Day")} else if (length(xpathApply(dataxml, "/HilltopServer/Month", xmlGetAttr, "Month"))>0) {
return("Month")}
}
estatperiod<-period(dataxml)
#Get the stats for each time period entry
Statistic <- xpathApply(dataxml, "/HilltopServer/Statistic", xmlValue)
estat<-do.call(rbind, xpathApply(dataxml, paste("/HilltopServer/",estatperiod, sep=""), function(node) {
xp <- "./*"
periodID <- xpathSApply(node, ".", function(x){xmlGetAttr(x, "Name")})
attribute<-xpathSApply(node, xp, xmlName)
value<-xpathSApply(node, xp, xmlValue)
data.frame(periodID, attribute, value, stringsAsFactors = FALSE)
}))
estest<-dcast(estat, periodID ~ attribute, value.var= "value")
estest$Statistic <- Statistic
return(estest)
}
hilltopEnsembleStatFull <- function(dataxml) {
#Takes the parsed xml from an EnsembleStats Request.
#Returns a dataframe of the statistics for the period, along with the background information such as site measurement units etc.
bg <- hilltopEnsembleStatBkgnd(dataxml)
pe <- hilltopEnsembleStatByTimePeriod(dataxml)
full<-merge(bg, pe, by = c("Statistic"), all = TRUE)
}