|
| 1 | +#' @name check_raising_factor_inspector |
| 2 | +#' @title Gives the inconsistencies between RF1 and threshold values |
| 3 | +#' @description The purpose of the check_raising_factor_inspector function is to provide a table of data that contains an inconsistency with the RF1 and the valid threshold (Default : 0.9 ; 1.1) |
| 4 | +#' @param dataframe1 {\link[base]{data.frame}} expected. Csv or output of the function {\link[furdeb]{data_extraction}}, which must be done before using the check_raising_factor_inspector () function. |
| 5 | +#' @param dataframe2 {\link[base]{data.frame}} expected. Csv or output of the function {\link[furdeb]{data_extraction}}, which must be done before using the check_raising_factor_inspector () function. |
| 6 | +#' @param dataframe3 {\link[base]{data.frame}} expected. Csv or output of the function {\link[furdeb]{data_extraction}}, which must be done before using the check_raising_factor_inspector () function. |
| 7 | +#' @param dataframe4 {\link[base]{data.frame}} expected. Csv or output of the function {\link[furdeb]{data_extraction}}, which must be done before using the check_raising_factor_inspector () function. |
| 8 | +#' @param output {\link[base]{character}} expected. Kind of expected output. You can choose between "message", "report" or "logical". |
| 9 | +#' @param country_species {\link[base]{character}} expected. Default values: list("1" = c("TUN", "ALB", "YFT", "BET", "SKJ"), "4" = c("LOT", "TUN", "ALB", "YFT", "BET", "SKJ", "LTA", "FRI", "BLF", "RAV*", "KAW", "FRZ", "BLT")). list of the inventory of species (FAO code) used to calculate catch weight in RF1 by country (country code). |
| 10 | +#' @param speciesfate {\link[base]{character}} expected. Default values: "6". Vector of inventory of fate used to calculate catch weight in RF1. |
| 11 | +#' @param vesselactivity {\link[base]{character}} expected. Default values: c("23", "25", "27", "29"). Vector of inventory of vessel activity NOT used to calculate catch weight in RF1. |
| 12 | +#' @param threshold {\link[base]{numeric}} expected. Default values: 0.9 and 1.1. Vector containing the lower and upper acceptable threshold for RF1. |
| 13 | +#' @return The function returns a {\link[base]{character}} with output is "message", a {\link[base]{data.frame}} with output is "report", a {\link[base]{logical}} with output is "logical" |
| 14 | +#' @details |
| 15 | +#' The input dataframe must contain all these columns for the function to work : |
| 16 | +#' \itemize{ |
| 17 | +#' Dataframe 1: |
| 18 | +#' \item{\code{ trip_id}} |
| 19 | +#' Dataframe 2: |
| 20 | +#' \item{\code{ catch_id}} |
| 21 | +#' \item{\code{ catch_weight}} |
| 22 | +#' \item{\code{ speciesfate_code}} |
| 23 | +#' \item{\code{ species_fao_code}} |
| 24 | +#' \item{\code{ vesselactivity_code}} |
| 25 | +#' \item{\code{ trip_id}} |
| 26 | +#' Dataframe 3: |
| 27 | +#' \item{\code{ landing_id}} |
| 28 | +#' \item{\code{ landing_weight}} |
| 29 | +#' \item{\code{ species_fao_code}} |
| 30 | +#' \item{\code{ trip_id}} |
| 31 | +#' Dataframe 4: |
| 32 | +#' \item{\code{ trip_id}} |
| 33 | +#' \item{\code{ trip_end_full_trip_id}} |
| 34 | +#' \item{\code{ vessel_id}} |
| 35 | +#' \item{\code{ country_fleetcountry}} |
| 36 | +#' } |
| 37 | +#' @export |
| 38 | +check_raising_factor_inspector <- function(dataframe1, |
| 39 | + dataframe2, |
| 40 | + dataframe3, |
| 41 | + dataframe4, |
| 42 | + output, |
| 43 | + country_species = list("1" = c("TUN", "ALB", "YFT", "BET", "SKJ"), "4" = c("LOT", "TUN", "ALB", "YFT", "BET", "SKJ", "LTA", "FRI", "BLF", "RAV*", "KAW", "FRZ", "BLT")), |
| 44 | + speciesfate = "6", |
| 45 | + vesselactivity = c("23", "25", "27", "29"), |
| 46 | + threshold = c(0.9, 1.1)) { |
| 47 | + # 0 - Global variables assignement ---- |
| 48 | + trip_id <- NULL |
| 49 | + catch_weight <- NULL |
| 50 | + sum_catch_weight <- NULL |
| 51 | + rf1 <- NULL |
| 52 | + full_trip_sum_landing_weight <- NULL |
| 53 | + full_trip_sum_catch_weight <- NULL |
| 54 | + lower_threshold <- NULL |
| 55 | + upper_threshold <- NULL |
| 56 | + speciesfate_code <- NULL |
| 57 | + vesselactivity_code <- NULL |
| 58 | + country_fleetcountry <- NULL |
| 59 | + landing_weight <- NULL |
| 60 | + sum_landing_weight <- NULL |
| 61 | + trip_end_full_trip_id <- NULL |
| 62 | + vessel_id <- NULL |
| 63 | + logical_full_trip <- NULL |
| 64 | + # 1 - Arguments verification ---- |
| 65 | + if (!codama::r_table_checking( |
| 66 | + r_table = dataframe1, |
| 67 | + type = "data.frame", |
| 68 | + column_name = c("trip_id"), |
| 69 | + column_type = c("character"), |
| 70 | + output = "logical" |
| 71 | + )) { |
| 72 | + codama::r_table_checking( |
| 73 | + r_table = dataframe1, |
| 74 | + type = "data.frame", |
| 75 | + column_name = c("trip_id"), |
| 76 | + column_type = c("character"), |
| 77 | + output = "message" |
| 78 | + ) |
| 79 | + } else { |
| 80 | + dataframe1 <- dataframe1[, c("trip_id")] |
| 81 | + } |
| 82 | + if (!codama::r_table_checking( |
| 83 | + r_table = dataframe2, |
| 84 | + type = "data.frame", |
| 85 | + column_name = c("catch_id", "catch_weight", "speciesfate_code", "species_fao_code", "vesselactivity_code", "trip_id"), |
| 86 | + column_type = c("character", "numeric", "character", "character", "character", "character"), |
| 87 | + output = "logical" |
| 88 | + )) { |
| 89 | + codama::r_table_checking( |
| 90 | + r_table = dataframe2, |
| 91 | + type = "data.frame", |
| 92 | + column_name = c("catch_id", "catch_weight", "speciesfate_code", "species_fao_code", "vesselactivity_code", "trip_id"), |
| 93 | + column_type = c("character", "numeric", "character", "character", "character", "character"), |
| 94 | + output = "message" |
| 95 | + ) |
| 96 | + } else { |
| 97 | + dataframe2 <- dataframe2[, c("catch_id", "catch_weight", "speciesfate_code", "species_fao_code", "vesselactivity_code", "trip_id")] |
| 98 | + } |
| 99 | + if (!codama::r_table_checking( |
| 100 | + r_table = dataframe3, |
| 101 | + type = "data.frame", |
| 102 | + column_name = c("landing_id", "landing_weight", "species_fao_code", "trip_id"), |
| 103 | + column_type = c("character", "numeric", "character", "character"), |
| 104 | + output = "logical" |
| 105 | + )) { |
| 106 | + codama::r_table_checking( |
| 107 | + r_table = dataframe3, |
| 108 | + type = "data.frame", |
| 109 | + column_name = c("landing_id", "landing_weight", "species_fao_code", "trip_id"), |
| 110 | + column_type = c("character", "numeric", "character", "character"), |
| 111 | + output = "message" |
| 112 | + ) |
| 113 | + } else { |
| 114 | + dataframe3 <- dataframe3[, c("landing_id", "landing_weight", "species_fao_code", "trip_id")] |
| 115 | + } |
| 116 | + if (!codama::r_table_checking( |
| 117 | + r_table = dataframe4, |
| 118 | + type = "data.frame", |
| 119 | + column_name = c("trip_id", "trip_end_full_trip_id", "vessel_id", "country_fleetcountry"), |
| 120 | + column_type = c("character", "character", "character", "character"), |
| 121 | + output = "logical" |
| 122 | + )) { |
| 123 | + codama::r_table_checking( |
| 124 | + r_table = dataframe4, |
| 125 | + type = "data.frame", |
| 126 | + column_name = c("trip_id", "trip_end_full_trip_id", "vessel_id", "country_fleetcountry"), |
| 127 | + column_type = c("character", "character", "character", "character"), |
| 128 | + output = "message" |
| 129 | + ) |
| 130 | + } else { |
| 131 | + dataframe4 <- dataframe4[, c("trip_id", "trip_end_full_trip_id", "vessel_id", "country_fleetcountry")] |
| 132 | + } |
| 133 | + # Checks the type and values of output |
| 134 | + if (!codama::r_type_checking( |
| 135 | + r_object = output, |
| 136 | + type = "character", |
| 137 | + allowed_value = c("message", "report", "logical"), |
| 138 | + output = "logical" |
| 139 | + )) { |
| 140 | + return(codama::r_type_checking( |
| 141 | + r_object = output, |
| 142 | + type = "character", |
| 143 | + allowed_value = c("message", "report", "logical"), |
| 144 | + output = "message" |
| 145 | + )) |
| 146 | + } |
| 147 | + # Checks the type of country_species |
| 148 | + if (!codama::r_type_checking( |
| 149 | + r_object = country_species, |
| 150 | + type = "list", |
| 151 | + output = "logical" |
| 152 | + )) { |
| 153 | + return(codama::r_type_checking( |
| 154 | + r_object = country_species, |
| 155 | + type = "list", |
| 156 | + output = "message" |
| 157 | + )) |
| 158 | + } |
| 159 | + # Checks the type of speciesfate |
| 160 | + if (!codama::r_type_checking( |
| 161 | + r_object = speciesfate, |
| 162 | + type = "character", |
| 163 | + output = "logical" |
| 164 | + )) { |
| 165 | + return(codama::r_type_checking( |
| 166 | + r_object = speciesfate, |
| 167 | + type = "character", |
| 168 | + output = "message" |
| 169 | + )) |
| 170 | + } |
| 171 | + # Checks the type of vesselactivity |
| 172 | + if (!codama::r_type_checking( |
| 173 | + r_object = vesselactivity, |
| 174 | + type = "character", |
| 175 | + output = "logical" |
| 176 | + )) { |
| 177 | + return(codama::r_type_checking( |
| 178 | + r_object = vesselactivity, |
| 179 | + type = "character", |
| 180 | + output = "message" |
| 181 | + )) |
| 182 | + } |
| 183 | + # Checks the type of threshold |
| 184 | + if (!codama::r_type_checking( |
| 185 | + r_object = threshold, |
| 186 | + type = "numeric", |
| 187 | + length = 2L, |
| 188 | + output = "logical" |
| 189 | + )) { |
| 190 | + return(codama::r_type_checking( |
| 191 | + r_object = threshold, |
| 192 | + type = "numeric", |
| 193 | + length = 2L, |
| 194 | + output = "message" |
| 195 | + )) |
| 196 | + } |
| 197 | + select <- dataframe1$trip_id |
| 198 | + nrow_first <- length(unique(select)) |
| 199 | + # 2 - Data design ---- |
| 200 | + # Add country_fleetcountry for catch |
| 201 | + dataframe2 <- merge(dataframe2, unique(dataframe4[, c("trip_id", "country_fleetcountry")]), by = "trip_id", all.x = TRUE) |
| 202 | + # Catch filtration for RF1 |
| 203 | + ## Selection species when the list is available for the country and selection species |
| 204 | + condition <- as.list(as.data.frame(t(data.frame(country = names(country_species), species = I(unname(country_species)))))) |
| 205 | + dataframe2_select_species <- purrr::map(condition, ~ dataframe2 %>% dplyr::filter((country_fleetcountry %in% .x[[1]] & species_fao_code %in% .x[[2]]))) |
| 206 | + dataframe2_select_species <- do.call(rbind.data.frame, dataframe2_select_species) |
| 207 | + ## Selection all species when the list is not available for the country |
| 208 | + dataframe2 <- rbind(dataframe2_select_species, dataframe2 %>% dplyr::filter(!(country_fleetcountry %in% names(country_species)))) |
| 209 | + ## Selection species fate |
| 210 | + dataframe2 <- dataframe2 %>% |
| 211 | + dplyr::filter((speciesfate_code %in% speciesfate)) |
| 212 | + ## Selection vessel activity |
| 213 | + dataframe2 <- dataframe2 %>% |
| 214 | + dplyr::filter(!(vesselactivity_code %in% vesselactivity)) |
| 215 | + # Calculation of the sum of weights caught per trip (Management of NA: if known value performs the sum of the values and ignores the NA, if no known value indicates NA) |
| 216 | + dataframe2 <- dataframe2 %>% |
| 217 | + dplyr::group_by(trip_id) %>% |
| 218 | + dplyr::summarise(sum_catch_weight = ifelse(all(is.na(catch_weight)), catch_weight[NA_integer_], sum(catch_weight, na.rm = TRUE))) |
| 219 | + # Merge data |
| 220 | + dataframe4 <- merge(dataframe4, dataframe2, by.x = "trip_id", by.y = "trip_id", all.x = TRUE) |
| 221 | + # Add country_fleetcountry for landing |
| 222 | + dataframe3 <- merge(dataframe3, unique(dataframe4[, c("trip_id", "country_fleetcountry")]), by = "trip_id", all.x = TRUE) |
| 223 | + # Landing filtration for RF1 |
| 224 | + ## Selection species when the list is available for the country and selection species fate |
| 225 | + condition <- as.list(as.data.frame(t(data.frame(country = names(country_species), species = I(unname(country_species)))))) |
| 226 | + dataframe3_select_species <- purrr::map(condition, ~ dataframe3 %>% dplyr::filter((country_fleetcountry %in% .x[[1]] & species_fao_code %in% .x[[2]]))) |
| 227 | + dataframe3_select_species <- do.call(rbind.data.frame, dataframe3_select_species) |
| 228 | + ## Selection all species when the list is not available for the country |
| 229 | + dataframe3 <- rbind(dataframe3_select_species, dataframe3 %>% dplyr::filter(!(country_fleetcountry %in% names(country_species)))) |
| 230 | + # Calculation of the sum of weights caught per trip (Management of NA: if known value performs the sum of the values and ignores the NA, if no known value indicates NA) |
| 231 | + dataframe3 <- dataframe3 %>% |
| 232 | + dplyr::group_by(trip_id) %>% |
| 233 | + dplyr::summarise(sum_landing_weight = ifelse(all(is.na(landing_weight)), landing_weight[NA_integer_], sum(landing_weight, na.rm = TRUE))) |
| 234 | + # Merge data |
| 235 | + dataframe4 <- merge(dataframe4, dataframe3, by.x = "trip_id", by.y = "trip_id", all.x = TRUE) |
| 236 | + # Add of a logic that indicates whether the full trip is finished or not |
| 237 | + dataframe4$logical_full_trip <- !is.na(dataframe4$trip_end_full_trip_id) |
| 238 | + # For unfinished full trips (no end-of-full-trip id) indicates the vessel id for the end-of-full-trip id (for each ship, allows you to group together all the trips of the non-finished full trip) |
| 239 | + dataframe4[is.na(dataframe4$trip_end_full_trip_id), "trip_end_full_trip_id"] <- paste0("vessel_id_", dataframe4[is.na(dataframe4$trip_end_full_trip_id), "vessel_id", drop = TRUE]) |
| 240 | + # RF1 calculation |
| 241 | + full_trip_id_data_rf1 <- dataframe4 %>% |
| 242 | + dplyr::group_by(trip_end_full_trip_id) %>% |
| 243 | + dplyr::summarise(rf1 = ifelse(all(is.na(sum_landing_weight)), sum_landing_weight[NA_integer_], sum(sum_landing_weight, na.rm = TRUE)) / ifelse(all(is.na(sum_catch_weight)), sum_catch_weight[NA_integer_], sum(sum_catch_weight, na.rm = TRUE)), full_trip_sum_landing_weight = ifelse(all(is.na(sum_landing_weight)), sum_landing_weight[NA_integer_], sum(sum_landing_weight, na.rm = TRUE)), full_trip_sum_catch_weight = ifelse(all(is.na(sum_catch_weight)), sum_catch_weight[NA_integer_], sum(sum_catch_weight, na.rm = TRUE))) |
| 244 | + dataframe4$lower_threshold <- threshold[1] |
| 245 | + dataframe4$upper_threshold <- threshold[2] |
| 246 | + # Selection of user-supplied trips |
| 247 | + dataframe4 <- merge(data.frame(trip_id = dataframe1$trip_id), unique(dataframe4), by.x = "trip_id", by.y = "trip_id", all.x = TRUE) |
| 248 | + # Merge data |
| 249 | + dataframe4 <- merge(dataframe4, full_trip_id_data_rf1, by.x = "trip_end_full_trip_id", by.y = "trip_end_full_trip_id", all.x = TRUE) |
| 250 | + # Compare RF1 to valid threshold |
| 251 | + comparison_less <- codama::vector_comparison( |
| 252 | + first_vector = dataframe4$rf1, |
| 253 | + second_vector = dataframe4$upper_threshold, |
| 254 | + comparison_type = "less_equal", |
| 255 | + output = "report" |
| 256 | + ) |
| 257 | + comparison_greater <- codama::vector_comparison( |
| 258 | + first_vector = dataframe4$rf1, |
| 259 | + second_vector = dataframe4$lower_threshold, |
| 260 | + comparison_type = "greater_equal", |
| 261 | + output = "report" |
| 262 | + ) |
| 263 | + dataframe4$logical <- comparison_less$logical & comparison_greater$logical |
| 264 | + # Corrects missing RF1s when nothing has been landed and there is no capture |
| 265 | + dataframe4[(is.na(dataframe4$full_trip_sum_landing_weight) | dataframe4$full_trip_sum_landing_weight == 0) & is.na(dataframe4$full_trip_sum_catch_weight), "logical"] <- TRUE |
| 266 | + dataframe4 <- dplyr::relocate(.data = dataframe4, rf1, .after = logical) |
| 267 | + dataframe4 <- subset(dataframe4, select = -c(trip_end_full_trip_id, logical_full_trip, sum_catch_weight, sum_landing_weight, full_trip_sum_landing_weight, full_trip_sum_catch_weight, lower_threshold, upper_threshold, vessel_id, country_fleetcountry)) |
| 268 | + if ((sum(dataframe4$logical, na.rm = TRUE) + sum(!dataframe4$logical, na.rm = TRUE)) != nrow_first || sum(is.na(dataframe4$logical)) > 0) { |
| 269 | + all <- c(select, dataframe4$trip_id) |
| 270 | + number_occurrences <- table(all) |
| 271 | + text <- "" |
| 272 | + if (sum(number_occurrences == 1) > 0) { |
| 273 | + text <- paste0(text, "Missing item ", "(", sum(number_occurrences == 1), "):", paste0(names(number_occurrences[number_occurrences == 1]), collapse = ", "), "\n") |
| 274 | + } |
| 275 | + if (sum(number_occurrences > 2) > 0) { |
| 276 | + text <- paste0(text, "Too many item ", "(", sum(number_occurrences > 2), "):", paste0(names(number_occurrences[number_occurrences > 2]), collapse = ", ")) |
| 277 | + } |
| 278 | + if (sum(is.na(dataframe4$logical)) > 0) { |
| 279 | + text <- paste0(text, "Unknown control result", "(", sum(is.na(dataframe4$logical)), "):", paste0(dataframe4$trip_id[is.na(dataframe4$logical)], collapse = ", ")) |
| 280 | + } |
| 281 | + warning( |
| 282 | + format( |
| 283 | + x = Sys.time(), |
| 284 | + format = "%Y-%m-%d %H:%M:%S" |
| 285 | + ), |
| 286 | + " - your data has some peculiarities that prevent the verification of inconsistencies.\n", |
| 287 | + text, |
| 288 | + sep = "" |
| 289 | + ) |
| 290 | + } |
| 291 | + # 3 - Export ---- |
| 292 | + if (output == "message") { |
| 293 | + return(print(paste0("There are ", sum(!dataframe4$logical), " trips with RF1 outside defined thresholds or missing"))) |
| 294 | + } |
| 295 | + if (output == "report") { |
| 296 | + return(dataframe4) |
| 297 | + } |
| 298 | + if (output == "logical") { |
| 299 | + if (sum(!dataframe4$logical) == 0) { |
| 300 | + return(TRUE) |
| 301 | + } else { |
| 302 | + return(FALSE) |
| 303 | + } |
| 304 | + } |
| 305 | +} |
0 commit comments