Skip to content

Commit

Permalink
Merge pull request #6 from RMI-PACTA/add-match-prio
Browse files Browse the repository at this point in the history
add run_match_prioritize
  • Loading branch information
jacobvjk authored Apr 3, 2024
2 parents d4e8402 + 4486b59 commit 87162d9
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 5 deletions.
5 changes: 2 additions & 3 deletions example.config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,5 @@ default:
use_own_sector_classification: FALSE
dir_own_sector_classification: "path/to/own_sector_classification_folder"
filename_own_sector_classification: "own_sector_classification.csv"



match_prioritize:
priority: NULL
4 changes: 2 additions & 2 deletions expected_columns.R
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ col_types_matched_prio_all_groups <- readr::cols_only(
col_select_matched_prio_all_groups <- names(col_types_matched_prio_all_groups[["cols"]])

# expected columns matched_all_groups file
col_types_matched_all_groups <- readr::cols_only(
col_types_matched_manual <- readr::cols_only(
group_id = "c",
id_loan = "c",
id_direct_loantaker = "c",
Expand Down Expand Up @@ -115,7 +115,7 @@ col_types_matched_all_groups <- readr::cols_only(
source = "c",
borderline = "l"
)
col_select_matched_all_groups <- names(col_types_matched_all_groups[["cols"]])
col_select_matched_manual <- names(col_types_matched_manual[["cols"]])

# expected columns raw loan book file
col_types_raw <- readr::cols(
Expand Down
69 changes: 69 additions & 0 deletions run_match_prioritize.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# set up project and load packages----
library(dplyr, warn.conflicts = FALSE)
library(r2dii.match)
library(readr)
library(vroom)

# source helpers----
source("expected_columns.R")

# load config----
config_dir <- config::get("directories")
dir_matched <- config_dir$dir_matched

config_match_prio <- config::get("match_prioritize")
match_prio_priority <- config_match_prio$priority

# validate config values----
if (!length(dir_matched) == 1) {
stop("Argument dir_matched must be of length 1. Please check your input.")
}
if (!inherits(dir_matched, "character")) {
stop("Argument dir_matched must be of class character. Please check your input.")
}
if (!is.null(match_prio_priority)) {
if (
!inherits(match_prio_priority, "character") &
!inherits(match_prio_priority, "formula") &
!inherits(match_prio_priority, "function")
) {
stop(
glue::glue(
"Argument match_prio_priority must be of one of: a character vector, a
function, or a quosure-style lambda function. Your input is of class
{class(match_prio_priority)}. Please check your input."
)
)
}
}

# load manually matched files----
list_matched_manual <- list.files(dir_matched)[grepl("^matched_lbk_.*_manual.csv$", list.files(dir_matched))]

if (length(list_matched_manual) == 0) {
stop(glue::glue("No manually matched loan book csvs found in {dir_matched}. Please check your project setup!"))
}

matched_lbk_manual <- vroom::vroom(
file = file.path(dir_matched, list_matched_manual),
col_types = col_types_raw,
col_select = dplyr::all_of(col_select_matched_manual)
) %>%
dplyr::group_split(.data$group_id)

# prioritize and save files----
for (i in 1:length(matched_lbk_manual)) {
group_name <- unique(matched_lbk_manual[[i]]$group_id)

## prioritize matched loan book----
matched_prio_i <- matched_lbk_manual[[i]] %>%
r2dii.match::prioritize(priority = match_prio_priority) %>%
dplyr::mutate(group_id = .env$group_name)

## write matched prioritized loan book to file----
matched_prio_i %>%
readr::write_csv(
file = file.path(dir_matched, glue::glue("matched_prio_{group_name}.csv")),
na = ""
)
}

0 comments on commit 87162d9

Please sign in to comment.