-
Notifications
You must be signed in to change notification settings - Fork 3
/
tx.R
187 lines (151 loc) · 6.4 KB
/
tx.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
# set working directory to the folder containing this script
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
# load required packages
library(tidyverse)
library(rvest)
library(janitor)
library(lubridate)
library(riem)
library(jsonlite)
library(sp)
library(fuzzyjoin)
#############
# load and process data from wastewater permit search
tx <- read_csv("tx/tx_dairies_feedlots.csv")
names(tx) <- c("auth_no","permittee","sic_code","segment_no","county","region","city","site_location")
glimpse(tx)
# process urls into separate column
tx <- tx %>%
separate(auth_no, into = c("auth_no","url"), sep = " ") %>%
mutate(url = str_replace_all(url, "\\(|\\)",""))
# scrape the detailed information for each permit
tx_detail <- tibble()
errors <- c()
for (u in tx$url) {
tryCatch(
{
lines <- read_html(u) %>%
html_nodes("p") %>%
html_text() %>%
str_squish()
text <- paste(lines, sep = " ", collapse=" ")
digester <- as.character(grepl("digester", text, ignore.case = TRUE))
auth_no <- str_subset(lines, "Permit Number")
auth_no <- str_split(auth_no, ":\\s*", simplify = TRUE)[, 2]
print(auth_no)
name <- str_subset(lines, "Site Name on Permit")
name <- str_split(name, ":\\s*", simplify = TRUE)[, 2]
animal_type <- str_subset(lines, "animal type")
animal_type <- str_split(animal_type, ":\\s*", simplify = TRUE)[, 2]
latitude <- str_subset(lines, "Latitude")[1]
latitude <- str_split(latitude, ":\\s*", simplify = TRUE)[, 2]
longitude <- str_subset(lines, "Longitude")[1]
longitude <- str_split(longitude, ":\\s*", simplify = TRUE)[, 2]
tmp <- tibble(auth_no = auth_no, name = name, animal_type = animal_type, latitude = latitude, longitude = longitude, digester = digester)
tx_detail <- bind_rows(tx_detail, tmp)
rm(tmp,lines,text,auth_no,name,animal_type,latitude,longitude,digester)
},
error = function(e) {
errors <- append(u,errors)
}
)
}
rm(u)
tx_cattle <- tx_detail %>%
unique() %>%
filter(grepl("CATTLE",animal_type)) %>%
rowwise() %>%
mutate(cattle = str_extract_all(animal_type, "\\d+"),
total = as.integer(cattle[1]),
milking = as.integer(cattle[2]),
milking = case_when(is.na(milking) ~ 0,
TRUE ~ milking),
other = total - milking,
type = case_when(milking == 0 ~ "Other",
TRUE ~ "Dairy"),
latitude = as.double(latitude),
longitude = as.double(longitude)) %>%
ungroup() %>%
select(-cattle)
# get temperature data
# get active weather stations in TX
tx_stations <- riem_stations(network = "TX_ASOS") %>%
filter(ymd_hms(archive_begin) <= as.Date("2022-01-01") & online == TRUE)
# get the mean temperatures in 2022 for these stations
mean_temps <- tibble()
# function to get mean temperatures for 2022 from Iowa Mesonet
get_mean_temp <- function(x) {
temperature_data <- riem_measures(
station = x,
date_start = "2022-01-01",
date_end = "2022-12-31")
return(mean(temperature_data$tmpf, na.rm = TRUE))
}
for (i in unique(tx_stations$id)) {
print(i)
mean_temp = get_mean_temp(i)
tmp <- tibble(id = i, mean_temp = mean_temp)
mean_temps <- bind_rows(mean_temps,tmp)
rm(tmp)
}
# remove any stations with no mean temperature data
mean_temps <- mean_temps %>%
filter(!is.na(mean_temp))
tx_stations <- inner_join(tx_stations,mean_temps)
# convert tx cattle facilities and tx stations to spatial points data frames
sp_tx_cattle <- SpatialPointsDataFrame(tx_cattle[,c("longitude", "latitude")], tx_cattle)
sp_tx_stations <- SpatialPointsDataFrame(tx_stations[,c("lon", "lat")], tx_stations)
# calculate matrix of distances in meters between event sites and ca weather stations
dist_matrix <- raster::pointDistance(sp_tx_cattle, sp_tx_stations, lonlat = TRUE)
# get index of closest station to each event site
closest_indices <- apply(dist_matrix, 1, which.min)
# get the distances for these stations from the facility and convert to miles
distance <- apply(dist_matrix, 1, min)*0.000621371
# filter for the closest station to each facility
closest_stations <- tx_stations[closest_indices,] %>%
cbind(distance)
# join to TX cattle data and convert to Celsius
closest_stations <- as_tibble(closest_stations) %>%
select(id,distance,mean_temp) %>%
mutate(mean_temp_c = (mean_temp-32)*5/9)
tx_cattle <- bind_cols(tx_cattle, closest_stations)
#############
# calculate methane emissions
# load data for manure conversion factors by temperature
manure <- read_csv("data/manure_emissions_factors.csv")
# difference join to Texas cattle data
tx_cattle <- difference_left_join(tx_cattle, manure,
by = c("mean_temp_c" = "temp"),
max_dist = 0.5)
# calculate emissions
# division by 1000 is because we are calculating emissions in metric tons and emissions factors are in kg
tx_cattle <- tx_cattle %>%
mutate(
name = str_to_title(name),
enteric_methane = (milking*138/1000) + (other*64/1000),
manure_methane = (milking*dairy_manure_factor/1000) + (other*other_manure_factor/1000),
total_methane = round(enteric_methane + manure_methane)
)
write_csv(tx_cattle, "processed_data/tx_cattle.csv", na = "")
#############
# ratio of other cattle to milking cattle at TX dairies (needed for CA estimates, where head counts at dairies exclude these animals)
dairy_ratio_other_milking <- tx_cattle %>%
filter(type == "Dairy") %>%
summarize(milking = sum(milking),
other = sum(other),
ratio = other / milking)
# there are 1.093614 other cattle for every milking cow at TX dairies
# some manual cleaning here to fix case errors
# Circle T Dairy also reports a beef operation. This has been renamed Circle T to avoid confusion
# import cleaned data and add formatted tooltip for Datawrapper, remove extraneous columns
tx_cattle <- read_csv("processed_data/tx_cattle_case_cleaned.csv") %>%
select(1:10,18:20) %>%
mutate(labeltext = paste0("<b>Name: </b>",name,"</br>",
"<b>Type: </b>",type,"</br>",
"<b>Methane emissions: </b>",prettyNum(total_methane, big.mark = ",")," metric tons"))
tx_table <- tx_cattle %>%
arrange(-total_methane) %>%
select(name,type,total_methane)
# export processed data
write_csv(tx_cattle, "processed_data/tx_cattle.csv", na = "")
write_csv(tx_table, "processed_data/tx_table.csv", na = "")