-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfield_nest_comparison.R
91 lines (71 loc) · 3.93 KB
/
field_nest_comparison.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
library(dplyr)
library(ggplot2)
calc_recall = function(data, detect_col, ref_col, detect_type){
n_refnests = sum(data[[ref_col]] == "T") # Either field nests or human detects
n_Udetects = sum(data[[detect_col]] == "U") # number of Uncertains
test_detections = sum(data[[detect_col]] == "T") # Nest detections
test_recall = test_detections/(n_refnests - n_Udetects) # recall where U excluded
test_recall_wU = test_detections/(n_refnests) # recall with U counted as a fail
results = data.frame(detect_type, n_refnests, n_Udetects, test_detections, test_recall, test_recall_wU)
return(results)
}
# DATA PROCESSING
## Load observer and algorithm observations and field nest records
obs_data = read.csv("exp-field-nest-eval_072423.csv")
field_data = read.csv("field_nest_sample_locations.csv")
## Filter field data records
### field data includes 2021 data (our first attempt at flagging nests that revealed how difficult it was)
### and a colony without ground control points
field_data = field_data |> filter(year == 2022)
field_data = field_data |> filter(site != "CypressCity")
## Create unique nest_ids for each record
obs_data$nest_id = paste(obs_data$year,"_",obs_data$colony,
"_",obs_data$sample_id, sep="")
obs_data = obs_data %>% filter(colony != "Joule")
field_data$nest_id = paste(field_data$year,"_",field_data$site,
"_",field_data$sample_id, sep="")
## Join field and observer files on unique nest id
merge_obs = inner_join(obs_data, field_data)
# Filter data to keep only real nests and remove uncertain human assessments
## Also changed real_nest = "yes" to T for consistency.
field_nests = merge_obs %>% filter(real_nest == "yes") |>
mutate(real_nest = "T") |>
filter(human_detect != "U")
# Generate raw sample sizes
field_bycolony = field_data |> count(site)
obs_bycolony = obs_data |> count(colony)
# Calculate Recall
## Human observations
recall_results = calc_recall(field_nests,
"human_detect",
"real_nest",
"human-real")
## Calculate and add algorithm results
recall_results = rbind(recall_results, calc_recall(field_nests,
"AI_nest",
"real_nest",
"computer-real"))
## Calculate and add human vs. algorithm results
recall_results = rbind(recall_results, calc_recall(field_nests,
"AI_nest",
"human_detect",
"computer-human"))
detections_colony = field_nests %>% group_by(colony) %>%
summarise(total=length(year), human_U = sum(human_detect == "U"), human=sum(human_detect=="T"),
computer=sum(AI_nest=="T"))
total_counts = field_nests %>% summarise(colony = "all",
total=sum(real_nest == "T"),
human_U = sum(human_detect == "U"),
human=sum(human_detect=="T"),
computer=sum(AI_nest=="T"))
detections_colony = bind_rows(detections_colony, total_counts)
detections_colony = detections_colony |> mutate(human_recall = human/(total - human_U),
computer_recall = computer/total,
comp_human_compare = computer/human)
total_nests=sum(detections_colony$total)
# Find human and algorithm disagreements for post-hoc assessment
disagreedTF = field_nests |> filter(human_detect =="T" & AI_nest == "FALSE")
disagreedFT = field_nests |> filter(human_detect =="F" & AI_nest == "TRUE")
uncertain = field_nests |> filter(human_detect =="U")
# write combined records for all ground monitored nests
write.csv(field_nests, "field__nest_records.csv")