Skip to content

Commit

Permalink
Merge pull request #1 from RMI-PACTA/add-matching-script
Browse files Browse the repository at this point in the history
add matching script
  • Loading branch information
jacobvjk authored Apr 2, 2024
2 parents 33ed02d + 753b1cc commit d4e8402
Show file tree
Hide file tree
Showing 4 changed files with 406 additions and 5 deletions.
7 changes: 2 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,8 @@ Imports:
pacta.multi.loanbook.plot,
readr,
rlang,
tidyr
Remotes:
RMI-PACTA/pacta.data.preparation,
RMI-PACTA/pacta.data.scraping,
RMI-PACTA/pacta.scenario.preparation
tidyr,
vroom
Depends:
R (>= 4.1.0)
License: MIT + file LICENSE
47 changes: 47 additions & 0 deletions example.config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
default:
directories:
dir_scenario: "path/to/scenario_folder"
dir_abcd: "path/to/abcd_folder"
dir_raw: "path/to/raw_folder"
dir_matched: "path/to/matched_folder"
dir_output: "path/to/output_folder"
file_names:
filename_raw: "raw_loanbook_123.csv"
filename_scenario_tms: "scenarios_2022_tms.csv"
filename_scenario_sda: "scenarios_2022_sda.csv"
filename_abcd: "abcd.csv"
project_parameters:
scenario_source: "weo_2022"
scenario_select: "nze_2050"
region_select: "global"
# normally the start year should correspond with year of the publication of
# the scenario in use
start_year_select: 2022
time_frame_select: 5
# regions must be available for the selected scenario
benchmark_regions_select: "global,european union"
remove_inactive_companies: TRUE
sector_split:
apply_sector_split: TRUE
sector_split_type: "equal_weights"
# sector_split_type: "worst_case"
dir_split_company_id: "path/to/split_folder"
filename_split_company_id: "split_company_ids.csv"
dir_advanced_company_indicators: "path/to/advanced_company_indicators_folder"
filename_advanced_company_indicators: "advanced_company_indicators.xlsx"
matching:
prep_input_level: "direct_loantaker"
params_match_name:
by_sector: TRUE
min_score: 0.9
method: "jw"
p: 0.1
overwrite: NULL
join_id: NULL
own_sector_classification:
use_own_sector_classification: FALSE
dir_own_sector_classification: "path/to/own_sector_classification_folder"
filename_own_sector_classification: "own_sector_classification.csv"



157 changes: 157 additions & 0 deletions expected_columns.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
# expected columns region isos file
col_types_region_isos <- readr::cols_only(
region = "c",
isos = "c",
source = "c"
)
col_select_region_isos <- names(col_types_region_isos[["cols"]])

# expected columns tms scenario file
col_types_scenario_tms <- readr::cols_only(
scenario_source = "c",
region = "c",
scenario = "c",
sector = "c",
technology = "c",
year = "i",
smsp = "n",
tmsr = "n"
)
col_select_scenario_tms <- names(col_types_scenario_tms[["cols"]])

# expected columns sda scenario file
col_types_scenario_sda <- readr::cols_only(
scenario_source = "c",
region = "c",
scenario = "c",
sector = "c",
year = "i",
emission_factor = "n",
emission_factor_unit = "c"
)
col_select_scenario_sda <- names(col_types_scenario_sda[["cols"]])

# expected columns abcd file
col_types_abcd <- readr::cols_only(
company_id = "i",
name_company = "c",
lei = "c",
is_ultimate_owner = "l",
sector = "c",
technology = "c",
plant_location = "c",
year = "i",
production = "n",
production_unit = "c",
emission_factor = "n",
emission_factor_unit = "c",
ald_timestamp = "c"
)
col_select_abcd <- names(col_types_abcd[["cols"]])

# expected columns matched_prioritized_all_groups file
col_types_matched_prio_all_groups <- readr::cols_only(
group_id = "c",
id_loan = "c",
id_direct_loantaker = "c",
name_direct_loantaker = "c",
id_intermediate_parent_1 = "c",
name_intermediate_parent_1 = "c",
id_ultimate_parent = "c",
name_ultimate_parent = "c",
loan_size_outstanding = "n",
loan_size_outstanding_currency = "c",
loan_size_credit_limit = "n",
loan_size_credit_limit_currency = "c",
sector_classification_system = "c",
sector_classification_input_type = "c",
sector_classification_direct_loantaker = "c",
fi_type = "c",
flag_project_finance_loan = "c",
name_project = "c",
lei_direct_loantaker = "c",
isin_direct_loantaker = "c",
id_2dii = "c",
level = "c",
sector = "c",
sector_abcd = "c",
name = "c",
name_abcd = "c",
score = "n",
source = "c",
borderline = "l"
)
col_select_matched_prio_all_groups <- names(col_types_matched_prio_all_groups[["cols"]])

# expected columns matched_all_groups file
col_types_matched_all_groups <- readr::cols_only(
group_id = "c",
id_loan = "c",
id_direct_loantaker = "c",
name_direct_loantaker = "c",
id_intermediate_parent_1 = "c",
name_intermediate_parent_1 = "c",
id_ultimate_parent = "c",
name_ultimate_parent = "c",
loan_size_outstanding = "n",
loan_size_outstanding_currency = "c",
loan_size_credit_limit = "n",
loan_size_credit_limit_currency = "c",
sector_classification_system = "c",
sector_classification_input_type = "c",
sector_classification_direct_loantaker = "c",
fi_type = "c",
flag_project_finance_loan = "c",
name_project = "c",
lei_direct_loantaker = "c",
isin_direct_loantaker = "c",
id_2dii = "c",
level = "c",
sector = "c",
sector_abcd = "c",
name = "c",
name_abcd = "c",
score = "n",
source = "c",
borderline = "l"
)
col_select_matched_all_groups <- names(col_types_matched_all_groups[["cols"]])

# expected columns raw loan book file
col_types_raw <- readr::cols(
id_loan = "c",
id_direct_loantaker = "c",
name_direct_loantaker = "c",
id_intermediate_parent_1 = "c",
name_intermediate_parent_1 = "c",
id_ultimate_parent = "c",
name_ultimate_parent = "c",
loan_size_outstanding = "n",
loan_size_outstanding_currency = "c",
loan_size_credit_limit = "n",
loan_size_credit_limit_currency = "c",
sector_classification_system = "c",
sector_classification_input_type = "c",
sector_classification_direct_loantaker = "c",
fi_type = "c",
flag_project_finance_loan = "c",
name_project = "c",
lei_direct_loantaker = "c",
isin_direct_loantaker = "c"
)

# expected columns companies_sector_split file
col_types_companies_sector_split <- readr::cols_only(
company_id = "i",
sector = "c",
sector_split = "n"
)
col_select_companies_sector_split <- names(col_types_companies_sector_split[["cols"]])

# expected columns companies_sector_split_worst_case file
col_types_companies_sector_split_worst_case <- readr::cols_only(
name_company = "c",
sector = "c",
sector_split = "n"
)
col_select_companies_sector_split_worst_case <- names(col_types_companies_sector_split_worst_case[["cols"]])
Loading

0 comments on commit d4e8402

Please sign in to comment.