-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDefiningTracks.R
More file actions
513 lines (447 loc) · 18.6 KB
/
DefiningTracks.R
File metadata and controls
513 lines (447 loc) · 18.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
#################################################################
# Script developed by Jen Cruz to clean and format location data #
# We also convert cleaned location data to tracks using atm package #
# We rely heavily on amt getting started vignette here: #
# https://cran.r-project.org/web/packages/amt/vignettes/p1_getting_started.html#
# #
# Data are Prairie Falcon locations collected during 2021 at #
# Morley Nelson Birds of Prey NCA. #
# Data were collected for multiple individuals and at #
# different frequencies including 2 sec intervals when the individuals#
# were moving (every 2-3 days), and 30min fixes otherwise. #
# Frequency shifted to hourly once individuals left their breeding grounds. #
#################################################################
################## Prep. workspace ###############################
# Install new packages from "CRAN" repository if you don't have them. #
install.packages( "tidyverse" ) #actually a collection of packages
install.packages( "amt" )
#trying to install amt directly from github
# install.packages( "devtools" )
# devtools::install_github("jmsigner/amt")
install.packages( "sf" )
# load packages relevant to this script:
library( tidyverse ) #easy data manipulation and plotting
# set option to see all columns and more than 10 rows
options( dplyr.width = Inf, dplyr.print_min = 100 )
library( amt ) #creating tracks from location data
library( sf ) #handling spatial data
## end of package load ###############
###################################################################
#### Load or create data -----------------------------------------
# Clean your workspace to reset your R environment. #
rm( list = ls() )
# Set working directory. This is the path to your Rstudio folder for this
# project. If you are in your correct Rstudio project then it should be:
getwd()
# set path to where you can access your data #
# Note that the path will be different for your.#
#datapath <- "Z:/Common/PrairieFalcons/"
datapath <- "Data/"
#import GPS data#
# Fixes are stored as separate CSV files for each individual
## We therefore create a function that imports multiple files at once:
load_data <- function( path ){
# extract all file names in the folder
myfiles <- dir( path, pattern = '\\.csv', full.names = TRUE )
for( i in 1:length(myfiles) ){
mydata <- read.csv( file = myfiles[i],
#remove white spaces
strip.white =TRUE,
#include column headers
header = TRUE,
# read the serial column as a character instead of number:
colClasses = c("serial" = "character") )
# create df for first file and append rows for other files
ifelse( i == 1,
df <- mydata,
df <- bind_rows(df, mydata) )
}
#return df for all individuals
return( df )
}
#apply function to import all files as list of databases:
dataraw <- load_data( paste0(datapath, '2021/') )
#Note that the files are all in a subdirectory
head(dataraw)
# Import trapping records with details of when radiotrackers were
# fitted to the individuals
records <- read.csv( file = paste0( datapath,"survey_0.csv" ),
#replaces those values with NA
na.strings = c(""," ","NA"),
# include column headings
header = TRUE )
#check
head( records ); dim( records )
#import polygon of the NCA as sf spatial file:
NCA_Shape <- sf::st_read( paste0( datapath, "BOPNCA_Boundary.shp") )
##############
#######################################################################
######## cleaning data ###############################################
# Data cleaning is crucial for accurate analysis #
# Trapping records provide info on when individuals were fitted #
# with transmitters.#
colnames( records )
# we keep transmitter id, date and sex
records <- records %>% dplyr::select( Telemetry.Unit.ID, Sex, x, y,
Date.and.Time )
#view
records
#convert date to correct format using lubridate
records$StartDate <- lubridate::mdy_hms( records$Date.and.Time,
tz = "MST" )
# Add a day so that we can ignore records from the trapping day #
# and start only with those from the following day:
records$StartDate <- records$StartDate + lubridate::days(1)
#convert to day of year
records$StartDay <- lubridate::yday( records$StartDate )
#unit IDs were missing starting number of their serial number #
# we append those so we can match it to the GPS serial IDs:
records$serial <- paste0( '894608001201',records$Telemetry.Unit.ID )
#check
head( records); dim( records)
# Using serial ID unique to each individual found in df, add territory column
# In Territory column each serial ID is linked to its corresponding territory
records <- records %>%
mutate(territory = case_when(
endsWith(serial, "47221") ~ "SG",
endsWith(serial, "47775") ~ "CRW",
endsWith(serial, "47072") ~ "UHD",
endsWith(serial, "47874") ~ "SDTP",
endsWith(serial, "48120") ~ "PR_II",
endsWith(serial, "46751") ~ "HHGS_DS",
endsWith(serial, "46983") ~ "HHGS_US",
endsWith(serial, "47197") ~ "Mac",
endsWith(serial, "48229") ~ "CRW_new",
endsWith(serial, "48377") ~ "CFR",
))
unique(records$territory)
head(records)
#keep version with coordinates to turn into shapefile later
nestcords <- records
#remove the coordinates from records
records <- records %>%
dplyr::select( -x, -y )
###################################################################
# Clean GPS data
# GPS units often provide information on the quality of the fixes they #
# obtained.#
# The units from Cellular track technologies provide HDOP, VDOP and #
# time to fix information #
# Start by viewing what those look like in the dataset #
hist( dataraw$vdop, breaks = 50 )
hist( dataraw$hdop, breaks = 50 )
hist( dataraw$time_to_fix, main = "Time to fix" )
# Remove 2D fixes and fixes where HDOP or VDOP ≥10 following #
# D’eon and Delparte (2005).#
# Also those where time to fix > 20min or with 0 satellites:
#start by creating a new dataframe to store cleaned location records:
datadf <- dataraw
#which columns do we have?
colnames( datadf )
# Filter to remove inaccurate locations
datadf <- datadf %>% dplyr::filter( hdop < 10 ) %>%
dplyr::filter( vdop < 10 ) %>%
dplyr::filter( time_to_fix <= 20 ) %>%
dplyr::filter( nsats > 0 ) %>%
dplyr::filter( lat > 0 ) %>%
#remove superfluous columns
dplyr::select( -inactivity, -geo, -data_voltage, -solar_current,
-solar_charge )
#view
head( datadf ); dim( datadf )
#How many rows did we remove?
# Answer:
#
dim( dataraw ) - dim( datadf )
# What % of data did we loose?
# Answer:
#
# We also need to set a time column containing date and time information #
# in POSIX format (as required by amt)#
# We rely on lubridate for this. If you haven't used lubridate before #
# go here: https://cran.r-project.org/web/packages/lubridate/vignettes/lubridate.html
# to learn more about how to easily manipulate time and dates in R #
# Data are stored in year, month, day, hour, minute, second format in our data.
# We define correct format with lubridate
datadf$date <- lubridate::ymd_hms( datadf$GPS_YYYY.MM.DD_HH.MM.SS,
tz = "UTC" )
datadf$date <- lubridate::with_tz( datadf$date, tz = "MST" )
# and create new column where we convert it to posixct
datadf$ts <- as.POSIXct( datadf$date )
#view
head( datadf ); dim( datadf )
# # check if any data are missing
all( complete.cases( datadf ) )
# # none so we can move on
# we also add month, week, and day of year information using lubridate
datadf <- datadf %>%
dplyr::mutate( mth = lubridate::month(date),
wk = lubridate::week(date),
jday = lubridate::yday(date) )
# We need to remove records for fixes that were recorded before the #
# units were fitted to the animals so we append relevant information #
# from the records dataframe. We do that by combining datadf to records df#
datadf <- records %>% dplyr::select( serial, territory, Sex, StartDay ) %>%
right_join( datadf, by = "serial" )
#view
head( datadf ); dim( datadf )
# Then using StartDay to filter records, removing those that occurred#
# earlier when unit was turned on, but not fitted to animal #
datadf <- datadf %>%
group_by( serial ) %>%
dplyr::filter( jday > StartDay ) %>% ungroup()
#view
head( datadf ); dim( datadf )
# serial IDs are cumbersome so we create a new individual ID column:
datadf <- datadf %>%
group_by( serial ) %>%
mutate( id = cur_group_id()) %>%
ungroup()
##################################################################
### Define coordinate system and projection for the data ######
# location data were recorded using WGS84 in lat long #
# We use the epsg code to define coordinate system for our sites #
# How? Google epsg WGS84 # First result should take you here: #
# https://spatialreference.org/ref/epsg/wgs-84/
# where you can find that epgs = 4326 for this coordinate system #
# If you are not familiar with geographic data, vector, rasters & #
# coordinate systems go here:
# https://bookdown.org/robinlovelace/geocompr/spatial-class.html #
# to learn more. #
# For amt, crs need to be provided using sp package so:
crsdata <- 4326#
# We also want to transform the lat longs to easting and northings #
# using UTM. For this we need to know what zone we are in. Go: #
# http://www.dmap.co.uk/utmworld.htm
# We are in zone 11.
# Alternatively we can crs of the polygon of our study area, which is already
# in eastings and northings. check that is the case for your own data
sf::st_crs( NCA_Shape )
#extract crs value for the study area (in easting northings)
crstracks <- sf::st_crs( NCA_Shape )
#create shapefile for nest locations which we will use in later analysis
records_sf <- st_as_sf( nestcords, coords = c("x", "y"), crs =crsdata )
# Note that here we HAVE to give it the crs that it was collected in #
# in this case the wgs84 in lat longs
# Reproject using the crs of the NCA shapefile :
records_trans <- sf::st_transform( records_sf, st_crs( NCA_Shape ) )
######################################################################
######## Visual checks of raw locations ################################
#######################################################################
#Check sample size #
table( datadf$id, datadf$wk )
# How many individuals have we dropped so far?
# ANSWER:
#
# Not all transmitters work according to our expectations.
# We can get an idea of the data collected for each individual
# by plotting histograms
#sampling duration
ggplot( datadf, aes( x = jday, group = id ) ) +
theme_classic( base_size = 15 ) +
geom_histogram( ) +
facet_wrap( ~ id )
# What do these histograms tell you about the nature of the data #
# Sample size, intensity for different individuals? #
# ANSWER:
#
#
#speeds traveled
ggplot( datadf, aes( x = speed, group = id ) ) +
theme_classic( base_size = 15 ) +
geom_histogram( ) +
facet_wrap( ~ id )
#Why is the first bar on the speed histograms so tall?
#ANSWER:
#
# do we need to remove data based on these?
#ANSWER:
#
################
#######################################################################
###### Creating tracks, calculating step lengths and turning angles ###
#### for all individuals at the same time: #####
########################################################################
#amt requires us to turn data into tracks for further analyses.
trks <- datadf %>%
#make track. Note you can add additional columns to it
amt::make_track( .y = lat, .x = lon, .t = ts,
#define columns that you want to keep, relabel if you need:
id = id, territory = territory,
sex = Sex, mth = mth, wk = wk,
jday = jday, speed = speed, alt = alt,
#assign correct crs in lat longs (WGS84)
crs = crsdata )
# remember you need to give it the original CRS first!!!!!
#check
head(trks)
# Reproject to UTM to convert lat lon to easting northing
#because it is an amt object now, we use an amt functions:
trks <- amt::transform_coords( trks, crs_to = crstracks )
#note that we are still using the study area crs
#check
head(trks)
#Turn into a tibble list by grouping and nest by individual IDs so that
# we can use map function for faster processing
trks.tib <- trks %>% amt::nest( data = -"id" )
#view
trks; trks.tib
# Remember we have multiple types of data including detailed data for flights #
# 3 times a week, 30min fixes during the day, then hourly fixes during #
# migration. We start by focusing on data during breeding season. #
# That means we need to remove migration locations.
# How do we know when individuals started migrating North?
# We plot overall paths for each individual:
for( i in 1:dim(trks.tib)[1]){
a <- as_sf_points( trks.tib$data[[i]] ) %>%
ggplot(.) + theme_bw(base_size = 17) +
labs( title = paste0('individual =', trks.tib$id[i]) ) +
geom_sf(data = NCA_Shape, inherit.aes = FALSE ) +
geom_sf()
print(a)
}
# Which ones have migration paths?
# Answer:
#
# Any ideas on how to remove migration data?
# Answer:
#
# Here we rely on NCA polygon, removing records that exist East of the #
# NCA. We can extra the extent of a polygon:
sf::st_bbox( NCA_Shape )
#Then use the Eastern-most coordinate to filter out data
xmax <- as.numeric(st_bbox(NCA_Shape)$xmax) #627081.5
#Then use the Northern-most coordinate to filter out data
ymax <- as.numeric(st_bbox(NCA_Shape)$ymax) + 10000 #627081.5
#subset those tracks less than as breeding and those > as migrating:
trks.tib <- trks.tib %>% mutate(
breeding = map( data, ~ filter(., x_ < xmax ) ) )
trks.tib <- trks.tib %>% mutate(
breeding = map( breeding, ~ filter(., y_ < ymax ) ) )
#some individuals come back to overwinter at the NCA and so #
# we need to remove those records as well #
# we do so using month column to remove anything after June
trks.tib <- trks.tib %>% mutate(
breeding = map( breeding, ~filter(., mth < 7 ) ),
migrating = map( data, ~filter(., mth > 6 ) ),
locals = map( migrating, ~ filter(., x_ < xmax ) ),
locals = map(locals, ~ filter(., y_ < ymax ) )
)
#check
trks.tib
# We focus on breeding season data for visualization as that is the
# one of interest in latest weeks.
for( i in 1:dim(trks.tib)[1]){
a <- as_sf_points( trks.tib$breeding[[i]] ) %>%
ggplot(.) + theme_bw(base_size = 17) +
labs( title = paste0('individual =', trks$id[i]) ) +
geom_sf(data = NCA_Shape, inherit.aes = FALSE ) +
geom_sf()
print(a)
}
# For homework plot the locals instead.#what do you note? are they all overwintering
# at the NCA? which ones are? List individuals here:
#ANSWER:
#
# Despite us setting a sampling (fix) rate for our transmitters, bad weather,
# thick canopy etc can cause fix attempts to fail. Our fix rate may therefore
# not be exactly what we set it for. If we want measures of distance (step lengths),
# or we want to use this data for AKDE, SSFs, iSSFs or HMMs (movement models) in discrete
# time, we need fix rates to be equally spaced. The first step to do this is to
# estimate sampling rate for each individual by looping through
# data using purr function map
sumtrks <- trks.tib %>% summarise(
map( breeding, amt::summarize_sampling_rate ) )
#view
sumtrks[[1]]
# This plots the actual sampling rate for each individual separately. #
# Look at the median. What is it? (units reported on last column)
# ANSWER:
#
# What about min and max? Those values give you an idea of the breaks in
# your data. The median allows you to work out the most common fix rate.
# Probably the one of interest for a lot of questions. #
# we will choose two sampling rates 30min (median value) for Range analysis #
# and RRFs and 5sec for movement questions, SSFs, iSSFs.
trks.all <- trks.tib %>% mutate(
# Here we take breeding season data and resample at 5 seconds, allowing +- 4sec:
highres = map( breeding, function(x) x %>%
track_resample( rate = seconds(5),
tolerance = seconds(4) ) ),
#Now repeat the process but with 30 min sampling rate
red = map(breeding, function( x ) x %>%
track_resample( rate = minutes(30),
tolerance = minutes(5) ) ) )
#view
trks.all
#note that this creates a new set of tibbles called steps - that uses
# the breeding season data
#Now unnest the dataframes of interest
#Pull out the 5sec breeding season data
trks.breed <- trks.all %>% dplyr::select( id, highres ) %>%
unnest( cols = highres )
tail( trks.breed )
#you can also plot them once you unnested the resampled points for fine tune cleaning
trks.breed %>%
#dplyr::filter( jday < ? ) %>%
as_sf_points( . ) %>%
#plot with ggplot
ggplot( . ) +
theme_bw( base_size = 10 ) +
geom_sf( aes( colour = as.factor(jday) ) ) +
geom_sf( data = NCA_Shape, fill = NA ) +
#plot separate for each individual
facet_wrap( ~id )
#remove remaining migrating tracks for those individuals that have them
trks.breed <- trks.breed %>%
dplyr::filter( jday < 178 )
#check
tail( trks.breed )
#replot to check that it worked
trks.breed %>%
as_sf_points( . ) %>%
#plot with ggplot
ggplot( . ) +
theme_bw( base_size = 10 ) +
geom_sf( aes( colour = as.factor(jday) ) ) +
geom_sf( data = NCA_Shape, fill = NA ) +
#plot separate for each individual
facet_wrap( ~id )
# Now for 30min intervals
trks.thin <- trks.all %>% dplyr::select( id, red ) %>%
unnest( cols = red ) %>% dplyr::filter( jday < 178 )
tail( trks.thin )
#Also migration data:
trks.mig <- trks.all %>% dplyr::select( id, migrating ) %>%
unnest( cols = migrating )
head( trks.mig )
#overwintering birds
trks.locals <- trks.all %>% dplyr::select( id, locals ) %>%
unnest( cols = locals )
head( trks.locals)
### repeat process of plotting tracks for overwintering birds!
## do you need to remove additional days? Do so if so.
#Answer:
#Plot locals here:
#
## Save updated version of locals here:
#
#############################################################################
# Saving relevant objects and data ---------------------------------
#save breeding season data (not thinned)
write_rds( trks.breed, "Data/trks.breed")
#save breeding season data (thinned)
write_rds( trks.thin, "Data/trks.thin" )
#save migration data (unthinned)
write_rds( trks.mig, "Data/trks.mig" )
### save locals data too
write_rds( trks.locals, "Data/trks.locals" )
#save shapefile of nest locations
sf::st_write( records_trans, "Data/nests.shp",
driver = "ESRI Shapefile",
#overwrites over existing shapefile
delete_layer = TRUE )
#save workspace in case we need to make changes
save.image( "TracksWorkspace.RData" )
########## end of save #########################
############### END OF SCRIPT ########################################