diff --git a/DESCRIPTION b/DESCRIPTION
index 94176fea..3111cb95 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -21,11 +21,8 @@ Imports:
     pacta.multi.loanbook.plot,
     readr,
     rlang,
-    tidyr
-Remotes:
-    RMI-PACTA/pacta.data.preparation,
-    RMI-PACTA/pacta.data.scraping,
-    RMI-PACTA/pacta.scenario.preparation
+    tidyr,
+    vroom
 Depends:
     R (>= 4.1.0)
 License: MIT + file LICENSE
diff --git a/example.config.yml b/example.config.yml
new file mode 100644
index 00000000..c2936ef3
--- /dev/null
+++ b/example.config.yml
@@ -0,0 +1,47 @@
+default:
+  directories:
+    dir_scenario: "path/to/scenario_folder"
+    dir_abcd: "path/to/abcd_folder"
+    dir_raw: "path/to/raw_folder"
+    dir_matched: "path/to/matched_folder"
+    dir_output: "path/to/output_folder"
+  file_names:
+    filename_raw: "raw_loanbook_123.csv"
+    filename_scenario_tms: "scenarios_2022_tms.csv"
+    filename_scenario_sda: "scenarios_2022_sda.csv"
+    filename_abcd: "abcd.csv"
+  project_parameters:
+    scenario_source: "weo_2022"
+    scenario_select: "nze_2050"
+    region_select: "global"
+    # normally the start year should correspond with year of the publication of
+    # the scenario in use
+    start_year_select: 2022
+    time_frame_select: 5
+    # regions must be available for the selected scenario
+    benchmark_regions_select: "global,european union"
+    remove_inactive_companies: TRUE
+  sector_split:
+    apply_sector_split: TRUE
+    sector_split_type: "equal_weights"
+    # sector_split_type: "worst_case"
+    dir_split_company_id: "path/to/split_folder"
+    filename_split_company_id: "split_company_ids.csv"
+    dir_advanced_company_indicators: "path/to/advanced_company_indicators_folder"
+    filename_advanced_company_indicators: "advanced_company_indicators.xlsx"
+  matching:
+    prep_input_level: "direct_loantaker"
+    params_match_name:
+      by_sector: TRUE
+      min_score: 0.9
+      method: "jw"
+      p: 0.1
+      overwrite: NULL
+      join_id: NULL
+    own_sector_classification:
+      use_own_sector_classification: FALSE
+      dir_own_sector_classification: "path/to/own_sector_classification_folder"
+      filename_own_sector_classification: "own_sector_classification.csv"
+
+
+
diff --git a/expected_columns.R b/expected_columns.R
new file mode 100644
index 00000000..daa66362
--- /dev/null
+++ b/expected_columns.R
@@ -0,0 +1,157 @@
+# expected columns region isos file
+col_types_region_isos <- readr::cols_only(
+  region = "c",
+  isos = "c",
+  source = "c"
+)
+col_select_region_isos <- names(col_types_region_isos[["cols"]])
+
+# expected columns tms scenario file
+col_types_scenario_tms <- readr::cols_only(
+  scenario_source = "c",
+  region = "c",
+  scenario = "c",
+  sector = "c",
+  technology = "c",
+  year = "i",
+  smsp = "n",
+  tmsr = "n"
+)
+col_select_scenario_tms <- names(col_types_scenario_tms[["cols"]])
+
+# expected columns sda scenario file
+col_types_scenario_sda <- readr::cols_only(
+  scenario_source = "c",
+  region = "c",
+  scenario = "c",
+  sector = "c",
+  year = "i",
+  emission_factor = "n",
+  emission_factor_unit = "c"
+)
+col_select_scenario_sda <- names(col_types_scenario_sda[["cols"]])
+
+# expected columns abcd file
+col_types_abcd <- readr::cols_only(
+  company_id = "i",
+  name_company = "c",
+  lei = "c",
+  is_ultimate_owner = "l",
+  sector = "c",
+  technology = "c",
+  plant_location = "c",
+  year = "i",
+  production = "n",
+  production_unit = "c",
+  emission_factor = "n",
+  emission_factor_unit = "c",
+  ald_timestamp = "c"
+)
+col_select_abcd <- names(col_types_abcd[["cols"]])
+
+# expected columns matched_prioritized_all_groups file
+col_types_matched_prio_all_groups <- readr::cols_only(
+  group_id = "c",
+  id_loan = "c",
+  id_direct_loantaker = "c",
+  name_direct_loantaker = "c",
+  id_intermediate_parent_1 = "c",
+  name_intermediate_parent_1 = "c",
+  id_ultimate_parent = "c",
+  name_ultimate_parent = "c",
+  loan_size_outstanding = "n",
+  loan_size_outstanding_currency = "c",
+  loan_size_credit_limit = "n",
+  loan_size_credit_limit_currency = "c",
+  sector_classification_system = "c",
+  sector_classification_input_type = "c",
+  sector_classification_direct_loantaker = "c",
+  fi_type = "c",
+  flag_project_finance_loan = "c",
+  name_project = "c",
+  lei_direct_loantaker = "c",
+  isin_direct_loantaker = "c",
+  id_2dii = "c",
+  level = "c",
+  sector = "c",
+  sector_abcd = "c",
+  name = "c",
+  name_abcd = "c",
+  score = "n",
+  source = "c",
+  borderline = "l"
+)
+col_select_matched_prio_all_groups <- names(col_types_matched_prio_all_groups[["cols"]])
+
+# expected columns matched_all_groups file
+col_types_matched_all_groups <- readr::cols_only(
+  group_id = "c",
+  id_loan = "c",
+  id_direct_loantaker = "c",
+  name_direct_loantaker = "c",
+  id_intermediate_parent_1 = "c",
+  name_intermediate_parent_1 = "c",
+  id_ultimate_parent = "c",
+  name_ultimate_parent = "c",
+  loan_size_outstanding = "n",
+  loan_size_outstanding_currency = "c",
+  loan_size_credit_limit = "n",
+  loan_size_credit_limit_currency = "c",
+  sector_classification_system = "c",
+  sector_classification_input_type = "c",
+  sector_classification_direct_loantaker = "c",
+  fi_type = "c",
+  flag_project_finance_loan = "c",
+  name_project = "c",
+  lei_direct_loantaker = "c",
+  isin_direct_loantaker = "c",
+  id_2dii = "c",
+  level = "c",
+  sector = "c",
+  sector_abcd = "c",
+  name = "c",
+  name_abcd = "c",
+  score = "n",
+  source = "c",
+  borderline = "l"
+)
+col_select_matched_all_groups <- names(col_types_matched_all_groups[["cols"]])
+
+# expected columns raw loan book file
+col_types_raw <- readr::cols(
+  id_loan = "c",
+  id_direct_loantaker = "c",
+  name_direct_loantaker = "c",
+  id_intermediate_parent_1 = "c",
+  name_intermediate_parent_1 = "c",
+  id_ultimate_parent = "c",
+  name_ultimate_parent = "c",
+  loan_size_outstanding = "n",
+  loan_size_outstanding_currency = "c",
+  loan_size_credit_limit = "n",
+  loan_size_credit_limit_currency = "c",
+  sector_classification_system = "c",
+  sector_classification_input_type = "c",
+  sector_classification_direct_loantaker = "c",
+  fi_type = "c",
+  flag_project_finance_loan = "c",
+  name_project = "c",
+  lei_direct_loantaker = "c",
+  isin_direct_loantaker = "c"
+)
+
+# expected columns companies_sector_split file
+col_types_companies_sector_split <- readr::cols_only(
+  company_id = "i",
+  sector = "c",
+  sector_split = "n"
+)
+col_select_companies_sector_split <- names(col_types_companies_sector_split[["cols"]])
+
+# expected columns companies_sector_split_worst_case file
+col_types_companies_sector_split_worst_case <- readr::cols_only(
+  name_company = "c",
+  sector = "c",
+  sector_split = "n"
+)
+col_select_companies_sector_split_worst_case <- names(col_types_companies_sector_split_worst_case[["cols"]])
diff --git a/run_matching.R b/run_matching.R
new file mode 100644
index 00000000..eb7d62b3
--- /dev/null
+++ b/run_matching.R
@@ -0,0 +1,200 @@
+# set up project and load packages----
+library(dplyr, warn.conflicts = FALSE)
+library(r2dii.data)
+library(r2dii.match)
+library(readr)
+library(withr)
+
+# source helpers----
+source("expected_columns.R")
+
+# load config----
+# TODO: all params to function signature
+config_dir <- config::get("directories")
+config_files <- config::get("file_names")
+
+dir_raw <- config_dir$dir_raw
+path_abcd <- file.path(config_dir$dir_abcd, config_files$filename_abcd)
+dir_matched <- config_dir$dir_matched
+
+config_matching <- config::get("matching")
+
+matching_by_sector <- config_matching$params_match_name$by_sector
+matching_min_score <- config_matching$params_match_name$min_score
+matching_method <- config_matching$params_match_name$method
+matching_p <- config_matching$params_match_name$p
+matching_overwrite <- config_matching$params_match_name$overwrite
+matching_join_id <- config_matching$params_match_name$join_id
+
+matching_use_own_sector_classification <- config_matching$own_sector_classification$use_own_sector_classification
+if (matching_use_own_sector_classification) {
+  dir_own_sector_classification <- config_matching$own_sector_classification$dir_own_sector_classification
+  filename_own_sector_classification <- config_matching$own_sector_classification$filename_own_sector_classification
+  path_own_sector_classification <- file.path(dir_own_sector_classification, filename_own_sector_classification)
+}
+
+# validate config values----
+if (!length(dir_raw) == 1) {
+  stop("Argument dir_raw must be of length 1. Please check your input.")
+}
+if (!inherits(dir_raw, "character")) {
+  stop("Argument dir_raw must be of class character. Please check your input.")
+}
+if (!length(path_abcd) == 1) {
+  stop("Argument path_abcd must be of length 1. Please check your input.")
+}
+if (!inherits(path_abcd, "character")) {
+  stop("Argument path_abcd must be of class character. Please check your input.")
+}
+if (!length(dir_matched) == 1) {
+  stop("Argument dir_matched must be of length 1. Please check your input.")
+}
+if (!inherits(dir_matched, "character")) {
+  stop("Argument dir_matched must be of class character. Please check your input.")
+}
+if (!length(matching_by_sector) == 1) {
+  stop("Argument matching_by_sector must be of length 1. Please check your input.")
+}
+if (!inherits(matching_by_sector, "logical")) {
+  stop("Argument matching_by_sector must be of class logical. Please check your input.")
+}
+if (!length(matching_min_score) == 1) {
+  stop("Argument matching_min_score must be of length 1. Please check your input.")
+}
+if (!inherits(matching_min_score, "numeric")) {
+  stop("Argument matching_min_score must be of class numeric. Please check your input.")
+}
+if (!length(matching_method) == 1) {
+  stop("Argument matching_method must be of length 1. Please check your input.")
+}
+if (!inherits(matching_method, "character")) {
+  stop("Argument matching_method must be of class character Please check your input.")
+}
+if (!length(matching_p) == 1) {
+  stop("Argument matching_p must be of length 1. Please check your input.")
+}
+if (!inherits(matching_p, "numeric")) {
+  stop("Argument matching_p must be of class numeric. Please check your input.")
+}
+# TODO: check for data.frame
+# if (!length(matching_overwrite) == 1) {
+#   stop("Argument matching_overwrite must be of length 1. Please check your input.")
+# }
+# if (!inherits(matching_overwrite, "numeric")) {
+#   stop("Argument matching_overwrite must be of class numeric. Please check your input.")
+# }
+# TODO: check for join_object
+# if (!length(matching_join_id) == 1) {
+#   stop("Argument matching_join_id must be of length 1. Please check your input.")
+# }
+# if (!inherits(matching_join_id, "numeric")) {
+#   stop("Argument matching_join_id must be of class numeric. Please check your input.")
+# }
+if (!length(matching_use_own_sector_classification) == 1) {
+  stop("Argument matching_use_own_sector_classification must be of length 1. Please check your input.")
+}
+if (!inherits(matching_use_own_sector_classification, "logical")) {
+  stop("Argument matching_use_own_sector_classification must be of class logical. Please check your input.")
+}
+# path to own sector classification onlz required if boolean TRUE
+if (matching_use_own_sector_classification) {
+  if (!length(path_own_sector_classification) == 1) {
+    stop("When matching_use_own_sector_classification == TRUE, argument path_own_sector_classification must be of length 1. Please check your input.")
+  }
+  if (!inherits(path_own_sector_classification, "character")) {
+    stop("When matching_use_own_sector_classification == TRUE, argument path_own_sector_classification must be of class character. Please check your input.")
+  }
+}
+
+# load data----
+
+## load abcd----
+if (!file.exists(path_abcd)) {
+  stop(glue::glue("No ABCD file found at path {path_abcd}. Please check your project setup!"))
+}
+
+abcd <- readr::read_csv(
+  file.path(path_abcd),
+  col_types = col_types_abcd,
+  col_select = dplyr::all_of(col_select_abcd)
+)
+
+## optionally load own classification system----
+if (matching_use_own_sector_classification) {
+  if (!file.exists(path_own_sector_classification)) {
+    stop(glue::glue("No sector classification file found at path {path_own_sector_classification}. Please check your project setup!"))
+  }
+
+  sector_classification_system <- readr::read_csv(
+    file = path_own_sector_classification,
+    col_types = readr::cols_only(
+      code_system = "c",
+      code = "c",
+      sector = "c",
+      borderline = "l"
+    )
+  ) %>%
+    dplyr::select(names(r2dii.data::sector_classifications))
+}
+
+## load raw loan books----
+list_raw <- list.files(dir_raw)[grepl(".csv", list.files(dir_raw))]
+
+if (length(list_raw) == 0) {
+  stop(glue::glue("No raw loan book csvs found in {dir_raw}. Please check your project setup!"))
+}
+
+raw_lbk <- vroom::vroom(
+  file = file.path(dir_raw, list_raw),
+  col_types = col_types_raw,
+  id = "group_id"
+) %>%
+  dplyr::mutate(
+    group_id = gsub(glue::glue("{dir_raw}/"), "", .data$group_id),
+    group_id = gsub(".csv", "", .data$group_id)
+  ) %>%
+  dplyr::group_split(.data$group_id)
+
+# match and save loan books----
+for (i in 1:length(raw_lbk)) {
+  group_name <- unique(raw_lbk[[i]]$group_id)
+
+  ## match data----
+  if (matching_use_own_sector_classification) {
+    withr::with_options(
+      new = list(r2dii.match.sector_classifications = sector_classification_system),
+      code = {
+        getOption("r2dii.match.sector_classifications")
+        matched_lbk_i <- r2dii.match::match_name(
+          loanbook = raw_lbk[[i]],
+          abcd = abcd,
+          by_sector = matching_by_sector,
+          min_score = matching_min_score,
+          method = matching_method,
+          p = matching_p,
+          overwrite = matching_overwrite,
+          join_id = matching_join_id
+          # TODO: allow surfacing the other match_name args
+        )
+      }
+    )
+  } else {
+    matched_lbk_i <- r2dii.match::match_name(
+      loanbook = raw_lbk[[i]],
+      abcd = abcd,
+      by_sector = matching_by_sector,
+      min_score = matching_min_score,
+      method = matching_method,
+      p = matching_p,
+      overwrite = matching_overwrite,
+      join_id = matching_join_id
+    )
+  }
+
+  ## write matched data to file----
+  matched_lbk_i %>%
+    readr::write_csv(
+      file = file.path(dir_matched, glue::glue("matched_lbk_{group_name}.csv")),
+      na = ""
+    )
+}