Merge pull request #6 from RMI-PACTA/add-match-prio

add run_match_prioritize
RMI-PACTA · Apr 3, 2024 · 87162d9 · 87162d9
2 parents d4e8402 + 4486b59
commit 87162d9
Show file tree

Hide file tree

Showing 3 changed files with 73 additions and 5 deletions.
diff --git a/example.config.yml b/example.config.yml
@@ -42,6 +42,5 @@ default:
       use_own_sector_classification: FALSE
       dir_own_sector_classification: "path/to/own_sector_classification_folder"
       filename_own_sector_classification: "own_sector_classification.csv"
-
-
-
+  match_prioritize:
+    priority: NULL
diff --git a/expected_columns.R b/expected_columns.R
@@ -84,7 +84,7 @@ col_types_matched_prio_all_groups <- readr::cols_only(
 col_select_matched_prio_all_groups <- names(col_types_matched_prio_all_groups[["cols"]])
 
 # expected columns matched_all_groups file
-col_types_matched_all_groups <- readr::cols_only(
+col_types_matched_manual <- readr::cols_only(
   group_id = "c",
   id_loan = "c",
   id_direct_loantaker = "c",
@@ -115,7 +115,7 @@ col_types_matched_all_groups <- readr::cols_only(
   source = "c",
   borderline = "l"
 )
-col_select_matched_all_groups <- names(col_types_matched_all_groups[["cols"]])
+col_select_matched_manual <- names(col_types_matched_manual[["cols"]])
 
 # expected columns raw loan book file
 col_types_raw <- readr::cols(

diff --git a/run_match_prioritize.R b/run_match_prioritize.R
@@ -0,0 +1,69 @@
+# set up project and load packages----
+library(dplyr, warn.conflicts = FALSE)
+library(r2dii.match)
+library(readr)
+library(vroom)
+
+# source helpers----
+source("expected_columns.R")
+
+# load config----
+config_dir <- config::get("directories")
+dir_matched <- config_dir$dir_matched
+
+config_match_prio <- config::get("match_prioritize")
+match_prio_priority <- config_match_prio$priority
+
+# validate config values----
+if (!length(dir_matched) == 1) {
+  stop("Argument dir_matched must be of length 1. Please check your input.")
+}
+if (!inherits(dir_matched, "character")) {
+  stop("Argument dir_matched must be of class character. Please check your input.")
+}
+if (!is.null(match_prio_priority)) {
+  if (
+    !inherits(match_prio_priority, "character") &
+    !inherits(match_prio_priority, "formula") &
+    !inherits(match_prio_priority, "function")
+  ) {
+    stop(
+      glue::glue(
+        "Argument match_prio_priority must be of one of: a character vector, a
+        function, or a quosure-style lambda function. Your input is of class
+        {class(match_prio_priority)}. Please check your input."
+      )
+    )
+  }
+}
+
+# load manually matched files----
+list_matched_manual <- list.files(dir_matched)[grepl("^matched_lbk_.*_manual.csv$", list.files(dir_matched))]
+
+if (length(list_matched_manual) == 0) {
+  stop(glue::glue("No manually matched loan book csvs found in {dir_matched}. Please check your project setup!"))
+}
+
+matched_lbk_manual <- vroom::vroom(
+  file = file.path(dir_matched, list_matched_manual),
+  col_types = col_types_raw,
+  col_select = dplyr::all_of(col_select_matched_manual)
+) %>%
+  dplyr::group_split(.data$group_id)
+
+# prioritize and save files----
+for (i in 1:length(matched_lbk_manual)) {
+  group_name <- unique(matched_lbk_manual[[i]]$group_id)
+
+  ## prioritize matched loan book----
+  matched_prio_i <- matched_lbk_manual[[i]] %>%
+    r2dii.match::prioritize(priority = match_prio_priority) %>%
+    dplyr::mutate(group_id = .env$group_name)
+
+  ## write matched prioritized loan book to file----
+  matched_prio_i %>%
+    readr::write_csv(
+      file = file.path(dir_matched, glue::glue("matched_prio_{group_name}.csv")),
+      na = ""
+    )
+}