-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpollutantmean.R
73 lines (67 loc) · 2.28 KB
/
pollutantmean.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
pollutantmean <- function(directory, pollutant, id = 1:332) {
## 'directory' is a character vector of length 1 indicating
## the location of the CSV files
# load the secified files into a dataframe
frame <- loadFrame(directory,id)
vpol <-frame[pollutant]
mean(vpol[!is.na(vpol)])
}
loadFrame <- function(d,items)
{
# get a list of the files in the directory
firstItem <- TRUE
for (idx in items)
{
currentFile <- paste(sprintf("%03d", idx),".csv", sep="")
print(currentFile)
if(file.exists(file.path(d,currentFile)))
{
if(firstItem == TRUE)
{
frame <- read.csv(file.path(d,currentFile),header=TRUE)
}
else
{
frameNext <- read.csv(file.path(d,currentFile),header=TRUE)
frame <- rbind(frame,frameNext)
}
firstItem<-FALSE
}
}
frame
}
complete <- function(directory, id = 1:332) {
## 'directory' is a character vector of length 1 indicating
## the location of the CSV files
## 'id' is an integer vector indicating the monitor ID numbers
## to be used
## Return a data frame of the form:
## id nobs
## 1 117
## 2 1041
## ...
## where 'id' is the monitor ID number and 'nobs' is the
## number of complete cases
frame <- loadFrame(directory,id)
colNames <- c("sulfate","nitrate")
ina <- !is.na(frame[colNames])
rBoth <- as.numeric(ina[,1] & ina[,2])
c <- cbind(frame["ID"],rBoth)
result <- aggregate(c$rBoth,by=list(id=c$ID),FUN=sum)
colnames(result) <- c("id","nobs")
result
}
corr <- function(directory, threshold = 0) {
## 'directory' is a character vector of length 1 indicating
## the location of the CSV files
allItems <- complete(directory, 500)
gtThreshold <- subset(allItems, nobs >= threshold)
## 'threshold' is a numeric vector of length 1 indicating the
## number of completely observed observations (on all
## variables) required to compute the correlation between
## nitrate and sulfate; the default is 0
filteredId <- gtThreshold[,c("id")]
## Return a numeric vector of correlations
loadFrame(directory,filteredId)
#allItems
}