-
Notifications
You must be signed in to change notification settings - Fork 1
/
DJFMP beach seine analyses 2.0.R
342 lines (281 loc) · 11.9 KB
/
DJFMP beach seine analyses 2.0.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
# Chinook Drought Response ----
# (possibly applicable to Threadfin Shad, American Shad, Delta Smelt,
# and Longfin Smelt, Striped Bass (Age0) too)
# working with 1976-2020 data (data are incomplete for 2021)
# from EDI Data Portal, downloaded ca Nov 11, 2021
# https://portal.edirepository.org/nis/metadataviewer?packageid=edi.244.8
# Interagency Ecological Program: Over four decades of juvenile fish monitoring
# data from the San Francisco Estuary, collected by the Delta Juvenile Fish
# Monitoring Program, 1976-2021
# DJFMP beach seine data -----
# https://portal.edirepository.org/nis/dataviewer?packageid=edi.244.8&entityid=147fd5e2c7db15913b2ffa44410dc7f9
# prep -----
rm(list = ls(all.names = TRUE)) # clear R environment
gc()
library(tidyverse)
library(readxl)
library(lubridate)
library(chron)
library(magrittr)
library(tsibble)
library(ggplot2)
library(dataRetrieval) # may require R v4.1.1
library(DescTools)
library(emmeans)
# data & set-up -----
yeartypes <- read_csv("yearassignments.csv")
#DJFMP <- read_csv("C:/Users/pnelson/OneDrive - California Department of Water Resources/3-Projects/05-drought monitoring/data/Salmon/1976-2021_DJFMP_beach_seine_fish_and_water_quality_data.csv")
# warnings limited to 352 records where "Expressions" were not as expected:
# ignore, unimportant
# 'massage' raw data; tweak variable types, add year_wk and unique set identifier
djfmp <- DJFMP %>%
transmute(date = mdy(SampleDate),
time = chron(times = SampleTime),
date_time = as_datetime(paste(date, time),
tz = "US/Pacific"),
count = Count,
stage = StageCode,
fl = ForkLength,
race_tag = RaceByTag,
location = as_factor(Location),
region = as_factor(RegionCode),
station = as_factor(StationCode),
weather = as_factor(WeatherCode),
race_fl = as_factor(RaceByLength),
race_tag = as_factor(RaceByTag),
temp = WaterTemp,
volume = Volume,
sp = as_factor(OrganismCode)) %>%
mutate(year_wk = yearweek(date),
setID = paste(date, station))
print(djfmp, width = Inf) # check...
# note that 'year' is based on calendar year; added water year below
## data manipulation ----
# chn totals for sets with chn (no chn=0 sets)
chn_sets <-
djfmp %>%
group_by(year_wk, setID, sp) %>%
summarise(count_by_set = sum(count)) %>%
filter(sp == "CHN")
# unique sets
sets <-
djfmp[, c(3, 17, 4, 16, 8:11)] %>%
distinct(setID, .keep_all = TRUE)
# join all sets w chn set totals
chn1 <-
left_join(sets, chn_sets[,2:4], by = "setID")
# replace count_by_set NAs w '0'
chn1$count_by_set <- replace_na(chn1$count_by_set, 0)
## regularize ####
# convert df of sets (w chn counts) to tsibble (time series)
# compute weekly number of chn and sets; use these to compute weekly cpue
# add water year (Oct-Sep)
chn2 <- tsibble(chn1, index = date_time, key = setID)
chn2 <- chn2 %>%
index_by(year_wk = year_wk) %>%
summarise(chn_wk = sum(count_by_set, na.rm = TRUE),
sets_wk = n_distinct(setID)) %>% # effort = number of sets per week
fill_gaps(chn_wk = NA, sets_wk = NA) %>% # regularize the time series
mutate(cpue = chn_wk/sets_wk, # calc cpue catch/effort
Date = date(year_wk)) # temporary: allows addWaterYear()
chn2 <- chn2 %>% addWaterYear()
## calc wkly cpue ####
# note: 'year' in chn3 refers to water year!
chn3 <-
chn2 %>%
mutate(day1 = Date,
cpue_wk = chn_wk/sets_wk,
year = as_factor(waterYear)) %>% # temp use of 'year'...
select(year, year_wk, day1, chn_wk, sets_wk, cpue_wk)
## add yeartypes ####
yeartypes <- yeartypes %>%
mutate(year = as_factor(Year),
index = Index,
yr_type = as_factor(Yr_type),
drought = as_factor(Drought),
short_term = as_factor(ShortTerm),
sprndoi = SprNDOI,
.keep = "unused")
yeartypes2 <- yeartypes %>%
mutate(water_yr = year)
chn4 <- left_join(as_tibble(chn3), # all rows in y for each matching row in x
yeartypes,
by = "year") %>%
mutate(water_yr = year) # restore proper variable name
chn4 <- chn4 %>%
mutate(year = as_factor(year(day1))) # convert to calendar year (df has both)
## cumulative totals ####
### cpue ####
chn5 <-
chn4 %>%
group_by(water_yr) %>%
mutate(cum_chn = cumsum(replace_na(chn_wk, 0)),
cum_set = cumsum(replace_na(sets_wk, 0)),
wk_cpue = replace_na(cum_chn/cum_set, 0),
wy_wk = seq(1:n()),
std_chn = replace_na(cum_chn/last(cum_chn)),
cum_cpue = replace_na(cum_chn/last(cum_set)),
std_cpue = replace_na(cum_chn/last(cum_set)/max(cum_cpue)))
# cum_chn & cum_sets now continuous through all weeks for each water year
# is the date for each year's max cum_cpue a reflection of env conditions?
# how about the magnitude of each year's max cum_cpue??
# rearrange variables & correct some numbers...
chn5 <- chn5[,c(1, 12, 2, 16, 3:6, 13:15, 17:19, 7:11)]
chn5[1:30,4] <- c(23:52) # assign correct water-year week number to first records
# re-order factors for plotting
chn5$yr_type <-
factor(chn5$yr_type,
levels = c("Critical", "Dry", "Below Normal", "Above Normal", "Wet"))
# preserve backup object
chn <- chn5
write_csv(chn5, "chn5_backup.csv")
## annual totals #####
# for both chn and sets
chn_ann1 <-
chn %>%
group_by(water_yr) %>%
summarise(chn_ann = max(cum_chn),
set_ann = max(cum_set),
cpue_ann = last(wk_cpue),
max_cpue = max(wk_cpue))
chn_ann <-
left_join(chn_ann1, yeartypes2, by = "water_yr", .keep = "none")
chn_ann$yr_type <-
factor(chn_ann$yr_type,
levels = c("Critical", "Dry", "Below Normal", "Above Normal", "Wet"))
chn_ann_backup <- chn_ann
write_csv(chn_ann_backup, "chn_ann_backup.csv")
# figures ####
## time series ####
p <- ggplot(chn %>%
filter(day1 < "2021-01-01", # 2021 doesn't have year type...
!is.na(cpue_wk)),
aes(x = year_wk, y = cpue_wk)) +
geom_line(color = "steelblue") +
geom_point() +
theme_bw() +
xlab("") +
ylab("CPUE (# CHN/set each week)") +
scale_color_discrete(name = "year type",
labels = c("dry", "neutral", "wet"))
p + geom_point(aes(color = factor(drought)))
## yearly accumulations ####
### raw counts ####
# not controlled for effort
ggplot(chn) +
aes(x = wy_wk, y = cum_chn, group = water_yr) +
xlim(0, 52) + # there are 53 weeks in some years!
geom_line(aes(color = drought), size = 1.3) +
xlab("Water-Year Week (Oct - Sep)") +
ylab("Cumulative Catch (raw counts, CHN)") +
theme_bw() +
scale_color_manual(values = c("#F8766D", "#7CAE00", "#00BFC4"),
name = "year type",
labels = c("Dry", "Neutral", "Wet", "no data"))
# there are no missing values in wy_wk or cum_chn; don't know why the warning
### std chn ####
# accumulated counts of chn standardized by the total chn for each year
ggplot(chn) +
aes(x = wy_wk, y = std_chn, group = water_yr) +
xlim(0, 52) + # there are 53 weeks in some years!
geom_line(aes(color = drought), size = 1.3) +
xlab("Water-Year Week (Oct - Sep)") +
ylab("Standardized Cumulative CHN (# weekly CHN/total CHN)") +
theme_bw() +
scale_color_manual(values = c("#F8766D", "#7CAE00", "#00BFC4"),
name = "year type",
labels = c("Dry", "Neutral", "Wet", "no data"))
### cum cpue ####
# weekly CHN counts relative to total annual effort (# of sets)
ggplot(chn) +
aes(x = wy_wk, y = cum_cpue, group = water_yr) +
xlim(0, 52) + # there are 53 weeks in some years!
geom_line(aes(color = drought), size = 1.3) +
xlab("Water-Year Week (Oct - Sep)") +
ylab("Cumulative CPUE (# CHN/total sets)") +
theme_bw() +
scale_color_manual(values = c("#F8766D", "#7CAE00", "#00BFC4"),
name = "year type",
labels = c("Dry", "Neutral", "Wet", "no data"))
# there are no missing values in wy_wk or cum_cpue; don't know why the warning
### std cpue #####
ggplot(chn) +
aes(x = wy_wk, y = std_cpue, group = water_yr) +
xlim(0, 52) + # there are 53 weeks in some years!
geom_line(aes(color = drought), size = 1.3) +
xlab("Water-Year Week (Oct - Sep)") +
ylab("Standardized Cumulative CPUE (weekly CPUE/max CPUE)") +
theme_bw() +
scale_color_manual(values = c("#F8766D", "#7CAE00", "#00BFC4"),
name = "year type",
labels = c("Dry", "Neutral", "Wet", "no data"))
#############################################################################
#What about the trawl data?
Trawl1 = read_csv("data/1976-2001_DJFMP_trawl_fish_and_water_quality_data.csv")
Trawl2 = read_csv("data/2002-2021_DJFMP_trawl_fish_and_water_quality_data.csv")
Trawls = bind_rows(Trawl1, Trawl2)%>%
select(Location, StationCode, SampleDate, MethodCode, TowNumber, TowDuration, Volume, IEPFishCode, Count)
TrawlsCH = pivot_wider(Trawls, names_from = IEPFishCode, values_from = Count, values_fn = sum, values_fill = 0) %>%
select(Location, StationCode, SampleDate, MethodCode, TowNumber, TowDuration, Volume, CHISAL) %>%
mutate(year_wk = yearweek(SampleDate),
setID = paste(SampleDate, StationCode),
CPUE = CHISAL/Volume)
#calculate cummulative CPUE for Chipps
Chipps = filter(TrawlsCH, Location == "Chipps Island") %>%
arrange(SampleDate) %>%
mutate(Year = year(SampleDate), julian = yday(SampleDate), wyjulian = case_when(
julian > 274 ~ julian -274,
TRUE ~ julian +91
))%>%
rename(Date = SampleDate) %>%
addWaterYear() %>%
group_by(waterYear) %>%
mutate(cumcatch = cumsum(CHISAL), cumcpue = cumsum(CPUE),
percentcatch = cumcatch/max(cumcatch, na.rm = T))
ggplot(Chipps, aes(x=wyjulian, y = cumcatch, group = waterYear)) + geom_line()+
coord_cartesian(xlim = c(0,320))
ggplot(Chipps, aes(x=wyjulian, y = percentcatch, group = waterYear, color = as.factor(waterYear))) + geom_line()+
coord_cartesian(xlim = c(0,320))+
ylab("Percent of Annual Chinook Salmon Catch")+
xlab("day of water year")
Chipps50 = Chipps %>%
filter(percentcatch >= 0.5) %>%
group_by(waterYear) %>%
summarise(a50_day = min(wyjulian))
chipdate2 = left_join(Chipps50, WYs, by = c("waterYear" = "Year"))
ggplot(chipdate2, aes(x=Drought, y = a50_day)) + geom_boxplot()
ggplot(chipdate2, aes(x=Yr_type, y = a50_day)) + geom_boxplot()
ggplot(chipdate2, aes(x= Index, y = a50_day)) + geom_point() + geom_smooth(method = "lm")
ggplot(filter(Chipps, waterYear>2013), aes(x=wyjulian, y = percentcatch, group = waterYear, color = as.factor(waterYear))) + geom_line()+
coord_cartesian(xlim = c(0,320))+
ylab("Percent of Annual Chinook Salmon Catch")+
xlab("day of water year")
####################################################
#sherwood harbor
#calculate cummulative CPUE for Chipps
swh = filter(TrawlsCH, Location == "Sherwood Harbor") %>%
arrange(SampleDate) %>%
mutate(Year = year(SampleDate), julian = yday(SampleDate), wyjulian = case_when(
julian > 274 ~ julian -274,
TRUE ~ julian +91
))%>%
rename(Date = SampleDate) %>%
addWaterYear() %>%
group_by(waterYear) %>%
mutate(cumcatch = cumsum(CHISAL), cumcpue = cumsum(CPUE),
percentcatch = cumcatch/max(cumcatch, na.rm = T))
ggplot(swh, aes(x=wyjulian, y = cumcatch, group = waterYear)) + geom_line()+
coord_cartesian(xlim = c(0,320))
ggplot(swh, aes(x=wyjulian, y = percentcatch, group = waterYear, color = as.factor(waterYear))) + geom_line()+
coord_cartesian(xlim = c(0,320))+
ylab("Percent of Annual Chinook Salmon Catch")+
xlab("day of water year")
ggplot(filter(swh, waterYear>2013), aes(x=wyjulian, y = percentcatch, group = waterYear, color = as.factor(waterYear))) + geom_line()+
coord_cartesian(xlim = c(0,320))+
ylab("Percent of Annual Chinook Salmon Catch")+
xlab("day of water year")
Test = data.frame(Julian = 1:365, wyJulian = NA)
Test = mutate(Test, wyjulian = case_when(
Julian > 275 ~ Julian -273,
TRUE ~ Julian +90
))