-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplot4.R
72 lines (51 loc) · 3.44 KB
/
plot4.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# Routine to produce a plot of total PM2.5 emissions throughout the US
# The raw data these routines work with may be found in
# https://d396qusza40orc.cloudfront.net/exdata%2Fdata%2FNEI_data.zip
#
# The location of the data is specified by environment.NEI.data. You can customize code for your
# system by modifying those environmental variables
#
# Prior to executing this function, please make sure you have downloaded and uncompressed the data
# using the appropriate routines.
#
# # make.NEI.plot4 prepares a plot of total PM2.5 emissions across the US broken down by EI Sector
#
make.NEI.plot4 <- function() {
#First we need the SCC codes that relate to Coal combustion.
my.cc.complete <- NEI.ClassCodes()
#We extract the EI Sector data
my.cc.types <- levels(my.cc.complete$EI.Sector)
#Next we find a subset of the Sector data that has the word 'Coal' in it.
my.cc.coal <- my.cc.types[grep(".*Coal.*", my.cc.types) ]
#We can then subset the SCC data frame so that we only have rows related to Coal combustion
my.cc.coalsubset <- subset(my.cc.complete, EI.Sector %in% my.cc.coal)
#Finally we prepare a data frame that we will merge NEI PM25 data
my.cc.premerge <- data.frame(my.cc.coalsubset$SCC,my.cc.coalsubset$EI.Sector)
#We subset the data without storing it, using is the fips code for Baltimore City
my.full.data <- subset( NEI.PM25.data(), SCC %in% my.cc.coalsubset$SCC)
#We now extend the data with the Sector names (using SCC codes as the table join)
my.full.data.extended <- merge(my.full.data,my.cc.premerge, by.x = c("SCC"), by.y = c("my.cc.coalsubset.SCC"))
#The aggregate function behaves like tapply, except we get a dataframe as output
#With col1 containing years, col2 containing types, col3 containing total emissions
plot4.data <- aggregate(my.full.data.extended$Emissions, list(Year = my.full.data.extended$year, Source = my.full.data.extended$type), FUN = sum)
names(plot4.data)[names(plot4.data)=='x'] <- 'Emission'
#Now we turn to the order in which the layers will be stacked in the eventual plot
#We start by getting the total emissions for each sector type over all years
plot4.order.data <- aggregate(my.full.data.extended$Emissions, list(Source = my.full.data.extended$type), FUN = sum)
#We then rename the column x to TotalEmissions so that it's less confusing down the road
names(plot4.order.data)[names(plot4.order.data)=='x'] <- 'TotalEmission'
#Now we add the Total Emissions column to the plot 4 data (we will use it for sorting the data)
plot4.data <- merge(plot4.data,plot4.order.data, by.x = "Source", by.y = "Source")
#Now we sort the data so that when qplot is executed, the smallest emission source goes on the bottom, and larger goes on top
plot4.data <- plot4.data[order(plot4.data$Year,plot4.data$TotalEmission),]
#png("plot2.png")
cplot <- qplot(Year,Emission, data = plot4.data, colour = Source) #Plots Emissions (x) vs Year using different colors
#for the points based on EI.Sector
#Automatically generates the legend as well.
cplot <- cplot + geom_area(aes(colour = Source, fill = Source),position = 'stack') #Stacked layer plot
cplot <- cplot + ylab("Total Emissions") # Labels the y axis (the x is automatically labeles
# with the name of the Year column in the original data frame).
cplot <- cplot + ggtitle("Coal Combustion PM2.5 Emissions Accross U.S.") # Adds a Chart title.
ggsave("plot4.png",cplot)
#dev.off()
}