Skip to content

Commit a4b3589

Browse files
Add check_raising_factor_inspector function
Add check_raising_factor_inspector function
1 parent 94e1382 commit a4b3589

File tree

5 files changed

+380
-2
lines changed

5 files changed

+380
-2
lines changed

DESCRIPTION

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: codama
22
Title: Common prOcesses for Data quAlity Monitoring
3-
Version: 1.0.0
3+
Version: 1.1.0
44
Authors@R:
55
c(person(given = "Oceane",
66
family = "Bouhineau",
@@ -55,7 +55,8 @@ Imports:
5555
ggplot2 (>= 3.4.4),
5656
reshape (>= 0.8.9),
5757
stats,
58-
utils (>= 4.2.2)
58+
utils (>= 4.2.2),
59+
purrr (>= 1.0.2)
5960
Remotes:
6061
OB7-IRD/furdeb
6162
Suggests:

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
export("%>%")
44
export(all_species_control)
5+
export(check_raising_factor_inspector)
56
export(check_species_catch_ocean)
67
export(fate_code_9_control)
78
export(file_path_checking)

NEWS.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# codama 1.1.0
2+
* Add check_raising_factor_inspector function
3+
14
# codama 1.0.0
25

36
<div style="text-align: justify">

R/check_raising_factor_inspector.R

Lines changed: 305 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,305 @@
1+
#' @name check_raising_factor_inspector
2+
#' @title Gives the inconsistencies between RF1 and threshold values
3+
#' @description The purpose of the check_raising_factor_inspector function is to provide a table of data that contains an inconsistency with the RF1 and the valid threshold (Default : 0.9 ; 1.1)
4+
#' @param dataframe1 {\link[base]{data.frame}} expected. Csv or output of the function {\link[furdeb]{data_extraction}}, which must be done before using the check_raising_factor_inspector () function.
5+
#' @param dataframe2 {\link[base]{data.frame}} expected. Csv or output of the function {\link[furdeb]{data_extraction}}, which must be done before using the check_raising_factor_inspector () function.
6+
#' @param dataframe3 {\link[base]{data.frame}} expected. Csv or output of the function {\link[furdeb]{data_extraction}}, which must be done before using the check_raising_factor_inspector () function.
7+
#' @param dataframe4 {\link[base]{data.frame}} expected. Csv or output of the function {\link[furdeb]{data_extraction}}, which must be done before using the check_raising_factor_inspector () function.
8+
#' @param output {\link[base]{character}} expected. Kind of expected output. You can choose between "message", "report" or "logical".
9+
#' @param country_species {\link[base]{character}} expected. Default values: list("1" = c("TUN", "ALB", "YFT", "BET", "SKJ"), "4" = c("LOT", "TUN", "ALB", "YFT", "BET", "SKJ", "LTA", "FRI", "BLF", "RAV*", "KAW", "FRZ", "BLT")). list of the inventory of species (FAO code) used to calculate catch weight in RF1 by country (country code).
10+
#' @param speciesfate {\link[base]{character}} expected. Default values: "6". Vector of inventory of fate used to calculate catch weight in RF1.
11+
#' @param vesselactivity {\link[base]{character}} expected. Default values: c("23", "25", "27", "29"). Vector of inventory of vessel activity NOT used to calculate catch weight in RF1.
12+
#' @param threshold {\link[base]{numeric}} expected. Default values: 0.9 and 1.1. Vector containing the lower and upper acceptable threshold for RF1.
13+
#' @return The function returns a {\link[base]{character}} with output is "message", a {\link[base]{data.frame}} with output is "report", a {\link[base]{logical}} with output is "logical"
14+
#' @details
15+
#' The input dataframe must contain all these columns for the function to work :
16+
#' \itemize{
17+
#' Dataframe 1:
18+
#' \item{\code{ trip_id}}
19+
#' Dataframe 2:
20+
#' \item{\code{ catch_id}}
21+
#' \item{\code{ catch_weight}}
22+
#' \item{\code{ speciesfate_code}}
23+
#' \item{\code{ species_fao_code}}
24+
#' \item{\code{ vesselactivity_code}}
25+
#' \item{\code{ trip_id}}
26+
#' Dataframe 3:
27+
#' \item{\code{ landing_id}}
28+
#' \item{\code{ landing_weight}}
29+
#' \item{\code{ species_fao_code}}
30+
#' \item{\code{ trip_id}}
31+
#' Dataframe 4:
32+
#' \item{\code{ trip_id}}
33+
#' \item{\code{ trip_end_full_trip_id}}
34+
#' \item{\code{ vessel_id}}
35+
#' \item{\code{ country_fleetcountry}}
36+
#' }
37+
#' @export
38+
check_raising_factor_inspector <- function(dataframe1,
39+
dataframe2,
40+
dataframe3,
41+
dataframe4,
42+
output,
43+
country_species = list("1" = c("TUN", "ALB", "YFT", "BET", "SKJ"), "4" = c("LOT", "TUN", "ALB", "YFT", "BET", "SKJ", "LTA", "FRI", "BLF", "RAV*", "KAW", "FRZ", "BLT")),
44+
speciesfate = "6",
45+
vesselactivity = c("23", "25", "27", "29"),
46+
threshold = c(0.9, 1.1)) {
47+
# 0 - Global variables assignement ----
48+
trip_id <- NULL
49+
catch_weight <- NULL
50+
sum_catch_weight <- NULL
51+
rf1 <- NULL
52+
full_trip_sum_landing_weight <- NULL
53+
full_trip_sum_catch_weight <- NULL
54+
lower_threshold <- NULL
55+
upper_threshold <- NULL
56+
speciesfate_code <- NULL
57+
vesselactivity_code <- NULL
58+
country_fleetcountry <- NULL
59+
landing_weight <- NULL
60+
sum_landing_weight <- NULL
61+
trip_end_full_trip_id <- NULL
62+
vessel_id <- NULL
63+
logical_full_trip <- NULL
64+
# 1 - Arguments verification ----
65+
if (!codama::r_table_checking(
66+
r_table = dataframe1,
67+
type = "data.frame",
68+
column_name = c("trip_id"),
69+
column_type = c("character"),
70+
output = "logical"
71+
)) {
72+
codama::r_table_checking(
73+
r_table = dataframe1,
74+
type = "data.frame",
75+
column_name = c("trip_id"),
76+
column_type = c("character"),
77+
output = "message"
78+
)
79+
} else {
80+
dataframe1 <- dataframe1[, c("trip_id")]
81+
}
82+
if (!codama::r_table_checking(
83+
r_table = dataframe2,
84+
type = "data.frame",
85+
column_name = c("catch_id", "catch_weight", "speciesfate_code", "species_fao_code", "vesselactivity_code", "trip_id"),
86+
column_type = c("character", "numeric", "character", "character", "character", "character"),
87+
output = "logical"
88+
)) {
89+
codama::r_table_checking(
90+
r_table = dataframe2,
91+
type = "data.frame",
92+
column_name = c("catch_id", "catch_weight", "speciesfate_code", "species_fao_code", "vesselactivity_code", "trip_id"),
93+
column_type = c("character", "numeric", "character", "character", "character", "character"),
94+
output = "message"
95+
)
96+
} else {
97+
dataframe2 <- dataframe2[, c("catch_id", "catch_weight", "speciesfate_code", "species_fao_code", "vesselactivity_code", "trip_id")]
98+
}
99+
if (!codama::r_table_checking(
100+
r_table = dataframe3,
101+
type = "data.frame",
102+
column_name = c("landing_id", "landing_weight", "species_fao_code", "trip_id"),
103+
column_type = c("character", "numeric", "character", "character"),
104+
output = "logical"
105+
)) {
106+
codama::r_table_checking(
107+
r_table = dataframe3,
108+
type = "data.frame",
109+
column_name = c("landing_id", "landing_weight", "species_fao_code", "trip_id"),
110+
column_type = c("character", "numeric", "character", "character"),
111+
output = "message"
112+
)
113+
} else {
114+
dataframe3 <- dataframe3[, c("landing_id", "landing_weight", "species_fao_code", "trip_id")]
115+
}
116+
if (!codama::r_table_checking(
117+
r_table = dataframe4,
118+
type = "data.frame",
119+
column_name = c("trip_id", "trip_end_full_trip_id", "vessel_id", "country_fleetcountry"),
120+
column_type = c("character", "character", "character", "character"),
121+
output = "logical"
122+
)) {
123+
codama::r_table_checking(
124+
r_table = dataframe4,
125+
type = "data.frame",
126+
column_name = c("trip_id", "trip_end_full_trip_id", "vessel_id", "country_fleetcountry"),
127+
column_type = c("character", "character", "character", "character"),
128+
output = "message"
129+
)
130+
} else {
131+
dataframe4 <- dataframe4[, c("trip_id", "trip_end_full_trip_id", "vessel_id", "country_fleetcountry")]
132+
}
133+
# Checks the type and values of output
134+
if (!codama::r_type_checking(
135+
r_object = output,
136+
type = "character",
137+
allowed_value = c("message", "report", "logical"),
138+
output = "logical"
139+
)) {
140+
return(codama::r_type_checking(
141+
r_object = output,
142+
type = "character",
143+
allowed_value = c("message", "report", "logical"),
144+
output = "message"
145+
))
146+
}
147+
# Checks the type of country_species
148+
if (!codama::r_type_checking(
149+
r_object = country_species,
150+
type = "list",
151+
output = "logical"
152+
)) {
153+
return(codama::r_type_checking(
154+
r_object = country_species,
155+
type = "list",
156+
output = "message"
157+
))
158+
}
159+
# Checks the type of speciesfate
160+
if (!codama::r_type_checking(
161+
r_object = speciesfate,
162+
type = "character",
163+
output = "logical"
164+
)) {
165+
return(codama::r_type_checking(
166+
r_object = speciesfate,
167+
type = "character",
168+
output = "message"
169+
))
170+
}
171+
# Checks the type of vesselactivity
172+
if (!codama::r_type_checking(
173+
r_object = vesselactivity,
174+
type = "character",
175+
output = "logical"
176+
)) {
177+
return(codama::r_type_checking(
178+
r_object = vesselactivity,
179+
type = "character",
180+
output = "message"
181+
))
182+
}
183+
# Checks the type of threshold
184+
if (!codama::r_type_checking(
185+
r_object = threshold,
186+
type = "numeric",
187+
length = 2L,
188+
output = "logical"
189+
)) {
190+
return(codama::r_type_checking(
191+
r_object = threshold,
192+
type = "numeric",
193+
length = 2L,
194+
output = "message"
195+
))
196+
}
197+
select <- dataframe1$trip_id
198+
nrow_first <- length(unique(select))
199+
# 2 - Data design ----
200+
# Add country_fleetcountry for catch
201+
dataframe2 <- merge(dataframe2, unique(dataframe4[, c("trip_id", "country_fleetcountry")]), by = "trip_id", all.x = TRUE)
202+
# Catch filtration for RF1
203+
## Selection species when the list is available for the country and selection species
204+
condition <- as.list(as.data.frame(t(data.frame(country = names(country_species), species = I(unname(country_species))))))
205+
dataframe2_select_species <- purrr::map(condition, ~ dataframe2 %>% dplyr::filter((country_fleetcountry %in% .x[[1]] & species_fao_code %in% .x[[2]])))
206+
dataframe2_select_species <- do.call(rbind.data.frame, dataframe2_select_species)
207+
## Selection all species when the list is not available for the country
208+
dataframe2 <- rbind(dataframe2_select_species, dataframe2 %>% dplyr::filter(!(country_fleetcountry %in% names(country_species))))
209+
## Selection species fate
210+
dataframe2 <- dataframe2 %>%
211+
dplyr::filter((speciesfate_code %in% speciesfate))
212+
## Selection vessel activity
213+
dataframe2 <- dataframe2 %>%
214+
dplyr::filter(!(vesselactivity_code %in% vesselactivity))
215+
# Calculation of the sum of weights caught per trip (Management of NA: if known value performs the sum of the values and ignores the NA, if no known value indicates NA)
216+
dataframe2 <- dataframe2 %>%
217+
dplyr::group_by(trip_id) %>%
218+
dplyr::summarise(sum_catch_weight = ifelse(all(is.na(catch_weight)), catch_weight[NA_integer_], sum(catch_weight, na.rm = TRUE)))
219+
# Merge data
220+
dataframe4 <- merge(dataframe4, dataframe2, by.x = "trip_id", by.y = "trip_id", all.x = TRUE)
221+
# Add country_fleetcountry for landing
222+
dataframe3 <- merge(dataframe3, unique(dataframe4[, c("trip_id", "country_fleetcountry")]), by = "trip_id", all.x = TRUE)
223+
# Landing filtration for RF1
224+
## Selection species when the list is available for the country and selection species fate
225+
condition <- as.list(as.data.frame(t(data.frame(country = names(country_species), species = I(unname(country_species))))))
226+
dataframe3_select_species <- purrr::map(condition, ~ dataframe3 %>% dplyr::filter((country_fleetcountry %in% .x[[1]] & species_fao_code %in% .x[[2]])))
227+
dataframe3_select_species <- do.call(rbind.data.frame, dataframe3_select_species)
228+
## Selection all species when the list is not available for the country
229+
dataframe3 <- rbind(dataframe3_select_species, dataframe3 %>% dplyr::filter(!(country_fleetcountry %in% names(country_species))))
230+
# Calculation of the sum of weights caught per trip (Management of NA: if known value performs the sum of the values and ignores the NA, if no known value indicates NA)
231+
dataframe3 <- dataframe3 %>%
232+
dplyr::group_by(trip_id) %>%
233+
dplyr::summarise(sum_landing_weight = ifelse(all(is.na(landing_weight)), landing_weight[NA_integer_], sum(landing_weight, na.rm = TRUE)))
234+
# Merge data
235+
dataframe4 <- merge(dataframe4, dataframe3, by.x = "trip_id", by.y = "trip_id", all.x = TRUE)
236+
# Add of a logic that indicates whether the full trip is finished or not
237+
dataframe4$logical_full_trip <- !is.na(dataframe4$trip_end_full_trip_id)
238+
# For unfinished full trips (no end-of-full-trip id) indicates the vessel id for the end-of-full-trip id (for each ship, allows you to group together all the trips of the non-finished full trip)
239+
dataframe4[is.na(dataframe4$trip_end_full_trip_id), "trip_end_full_trip_id"] <- paste0("vessel_id_", dataframe4[is.na(dataframe4$trip_end_full_trip_id), "vessel_id", drop = TRUE])
240+
# RF1 calculation
241+
full_trip_id_data_rf1 <- dataframe4 %>%
242+
dplyr::group_by(trip_end_full_trip_id) %>%
243+
dplyr::summarise(rf1 = ifelse(all(is.na(sum_landing_weight)), sum_landing_weight[NA_integer_], sum(sum_landing_weight, na.rm = TRUE)) / ifelse(all(is.na(sum_catch_weight)), sum_catch_weight[NA_integer_], sum(sum_catch_weight, na.rm = TRUE)), full_trip_sum_landing_weight = ifelse(all(is.na(sum_landing_weight)), sum_landing_weight[NA_integer_], sum(sum_landing_weight, na.rm = TRUE)), full_trip_sum_catch_weight = ifelse(all(is.na(sum_catch_weight)), sum_catch_weight[NA_integer_], sum(sum_catch_weight, na.rm = TRUE)))
244+
dataframe4$lower_threshold <- threshold[1]
245+
dataframe4$upper_threshold <- threshold[2]
246+
# Selection of user-supplied trips
247+
dataframe4 <- merge(data.frame(trip_id = dataframe1$trip_id), unique(dataframe4), by.x = "trip_id", by.y = "trip_id", all.x = TRUE)
248+
# Merge data
249+
dataframe4 <- merge(dataframe4, full_trip_id_data_rf1, by.x = "trip_end_full_trip_id", by.y = "trip_end_full_trip_id", all.x = TRUE)
250+
# Compare RF1 to valid threshold
251+
comparison_less <- codama::vector_comparison(
252+
first_vector = dataframe4$rf1,
253+
second_vector = dataframe4$upper_threshold,
254+
comparison_type = "less_equal",
255+
output = "report"
256+
)
257+
comparison_greater <- codama::vector_comparison(
258+
first_vector = dataframe4$rf1,
259+
second_vector = dataframe4$lower_threshold,
260+
comparison_type = "greater_equal",
261+
output = "report"
262+
)
263+
dataframe4$logical <- comparison_less$logical & comparison_greater$logical
264+
# Corrects missing RF1s when nothing has been landed and there is no capture
265+
dataframe4[(is.na(dataframe4$full_trip_sum_landing_weight) | dataframe4$full_trip_sum_landing_weight == 0) & is.na(dataframe4$full_trip_sum_catch_weight), "logical"] <- TRUE
266+
dataframe4 <- dplyr::relocate(.data = dataframe4, rf1, .after = logical)
267+
dataframe4 <- subset(dataframe4, select = -c(trip_end_full_trip_id, logical_full_trip, sum_catch_weight, sum_landing_weight, full_trip_sum_landing_weight, full_trip_sum_catch_weight, lower_threshold, upper_threshold, vessel_id, country_fleetcountry))
268+
if ((sum(dataframe4$logical, na.rm = TRUE) + sum(!dataframe4$logical, na.rm = TRUE)) != nrow_first || sum(is.na(dataframe4$logical)) > 0) {
269+
all <- c(select, dataframe4$trip_id)
270+
number_occurrences <- table(all)
271+
text <- ""
272+
if (sum(number_occurrences == 1) > 0) {
273+
text <- paste0(text, "Missing item ", "(", sum(number_occurrences == 1), "):", paste0(names(number_occurrences[number_occurrences == 1]), collapse = ", "), "\n")
274+
}
275+
if (sum(number_occurrences > 2) > 0) {
276+
text <- paste0(text, "Too many item ", "(", sum(number_occurrences > 2), "):", paste0(names(number_occurrences[number_occurrences > 2]), collapse = ", "))
277+
}
278+
if (sum(is.na(dataframe4$logical)) > 0) {
279+
text <- paste0(text, "Unknown control result", "(", sum(is.na(dataframe4$logical)), "):", paste0(dataframe4$trip_id[is.na(dataframe4$logical)], collapse = ", "))
280+
}
281+
warning(
282+
format(
283+
x = Sys.time(),
284+
format = "%Y-%m-%d %H:%M:%S"
285+
),
286+
" - your data has some peculiarities that prevent the verification of inconsistencies.\n",
287+
text,
288+
sep = ""
289+
)
290+
}
291+
# 3 - Export ----
292+
if (output == "message") {
293+
return(print(paste0("There are ", sum(!dataframe4$logical), " trips with RF1 outside defined thresholds or missing")))
294+
}
295+
if (output == "report") {
296+
return(dataframe4)
297+
}
298+
if (output == "logical") {
299+
if (sum(!dataframe4$logical) == 0) {
300+
return(TRUE)
301+
} else {
302+
return(FALSE)
303+
}
304+
}
305+
}

0 commit comments

Comments
 (0)