diff --git a/helper_functions.R b/helper_functions.R
new file mode 100644
index 00000000..1ee63dd6
--- /dev/null
+++ b/helper_functions.R
@@ -0,0 +1,69 @@
+lost_companies_sector_split <- function(abcd,
+                                        companies_sector_split) {
+  abcd_id <- abcd %>%
+    dplyr::distinct(.data$company_id, .data$name_company)
+
+  # identify lost_companies_sector_split and write to csv for inspection
+  lost_companies_sector_split <- companies_sector_split %>%
+    dplyr::anti_join(
+      abcd_id,
+      by = c("company_id")
+    )
+
+  return(lost_companies_sector_split)
+}
+
+apply_sector_split_to_loans <- function(data,
+                                        abcd,
+                                        companies_sector_split) {
+  unique_companies_pre_split <- data %>%
+    distinct(name_abcd)
+
+  abcd_id <- abcd %>%
+    dplyr::distinct(.data$company_id, .data$name_company)
+
+  companies_sector_split <- companies_sector_split %>%
+    dplyr::left_join(
+      abcd_id,
+      by = c("company_id")
+    ) %>%
+    dplyr::select(-"company_id")
+
+  data <- data %>%
+    dplyr::inner_join(
+      companies_sector_split,
+      by = c("name_abcd" = "name_company", "sector_abcd" = "sector")
+    ) %>%
+    dplyr::mutate(
+      # renaming the loan_id is not conditional to avoid any chance of accidentally
+      # renaming a split loan to a loan_id that already exists elsewhere
+      id_loan = paste(.data$id_loan, .data$sector_abcd, sep = "_"),
+      loan_size_outstanding = dplyr::if_else(
+        is.na(.data$sector_split),
+        .data$loan_size_outstanding,
+        .data$loan_size_outstanding * .data$sector_split
+      ),
+      loan_size_credit_limit = dplyr::if_else(
+        is.na(.data$sector_split),
+        .data$loan_size_credit_limit,
+        .data$loan_size_credit_limit * .data$sector_split
+      )
+    ) %>%
+    dplyr::select(-"sector_split")
+
+  unique_companies_post_split <- data %>%
+    distinct(name_abcd)
+
+  if (nrow(unique_companies_pre_split) != nrow(unique_companies_post_split)) {
+    warning(
+      glue::glue(
+        "Applying the sector split has lead to changes in the number of unique
+        companies covered in the analysis. Prior to the split, there were
+        {nrow(unique_companies_pre_split)} unique companies. After the split,
+        there are {nrow(unique_companies_post_split)} unique companies."
+      )
+    )
+  }
+
+  return(data)
+}
diff --git a/prepare_sector_split.R b/prepare_sector_split.R
new file mode 100644
index 00000000..0a834096
--- /dev/null
+++ b/prepare_sector_split.R
@@ -0,0 +1,412 @@
+# This script can be used to derive company-specific sector shares for companies
+# that are active in two or more of the in-scope PACTA sectors. There are a
+# number of ways how to calculate sector splits. One option is to calculate
+# sector splits based on an equal weights approach, with an option
+# to use primary energy inputs to calculate the shares for PACTA energy sectors:
+# Coal, Oil & Gas, Power. Another option is to allocate the split entirely to
+# the worst performing sector, based on the aggregate alignment metric on the
+# company-sector level.
+# The initial sector split is calculated as an equal weights split based on the
+# number of in-scope sectors the company operates in. The energy-focused second
+# step calculates energy sector splits based on a common unit of economic
+# activity, million tons of oil equivalent (mtoe). A number of transformation
+# steps must be made to arrive at this common unit for all three energy sectors.
+# For the Oil & Gas and the Coal sectors, the transformation is rather straight
+# forward, as both are already given in terms of primary energy. This means a
+# simple conversion factor can be applied.
+# For Power Generation, the steps are more complicated. Power generation is
+# given in terms of MWh of electricity generated. This needs to be converted to
+# mtoe using a conversion factor as in the other cases. Additionally, we need to
+# consider that the process of generating electricity from a primary energy
+# input leads to losses of primary energy in terms of heat in some technologies.
+# The amount of primary energy lost to heat differs by technology and is
+# generally relevant in power generation through heat or combustion, but not all
+# other ways of generating electricity.
+# We therefore have to divide the electricity generated by an efficiency factor
+# specific to each technology to arrive at a primary energy equivalent for power
+# generation.
+# The output is a list of companies and sectors the companies operate in, for
+# which a sector share ratio is provided, based on the two steps outlined above.
+# The intended use case is to provide a rule by which to allocate loans to
+# multiple sectors, for loan books where such loan allocation is ambiguous.
+
+# load packages----
+library(dplyr, warn.conflicts = FALSE)
+library(janitor)
+library(readr)
+library(readxl)
+
+# source helpers----
+source("expected_columns.R")
+
+# load config----
+config_dir <- config::get("directories")
+config_files <- config::get("file_names")
+config_project_parameters <- config::get("project_parameters")
+config_prepare_sector_split <- config::get("sector_split")
+
+dir_matched <- config_dir$dir_matched
+
+path_sector_split <- file.path(
+  config_prepare_sector_split$dir_split_company_id,
+  config_prepare_sector_split$filename_split_company_id
+)
+
+path_advanced_company_indicators <- file.path(
+  config_prepare_sector_split$dir_advanced_company_indicators,
+  config_prepare_sector_split$filename_advanced_company_indicators
+)
+
+sheet_advanced_company_indicators <- config_prepare_sector_split$sheet_advanced_company_indicators
+
+start_year <- config_project_parameters$start_year
+time_frame <- config_project_parameters$time_frame
+
+## load input data----
+advanced_company_indicators_raw <- readxl::read_xlsx(
+  path = path_advanced_company_indicators,
+  sheet = sheet_advanced_company_indicators
+)
+
+company_ids_primary_energy_split <- readr::read_csv(
+  path_sector_split,
+  col_types = readr::cols_only(company_id = "d"),
+  col_select = "company_id"
+) %>%
+  dplyr::pull(.data$company_id)
+
+# optional: remove inactive companies
+if (config_project_parameters$remove_inactive_companies) {
+  abcd_removed_inactive_companies <- readr::read_csv(
+    file.path(config_dir$dir_abcd, "abcd_removed_inactive_companies.csv"),
+    col_select = cols_abcd
+  )
+}
+
+## auxiliary data sets----
+
+# Physical energy content and primary energy efficiency.
+# Power generation based on heat/combustion causes a loss of a share of primary
+# energy. To back calculate the primary energy content based on electricity
+# metrics, such as power generation, therefore requires dividing the power
+# capacity by this efficiency factor to derive the physical energy input.
+# We apply such factors for fossil fuel based power generation only, since we
+# are interested in approximating the exposure to fossil fuels.
+# Values for efficiency of electricity production are taken from IEA "Energy
+# Efficiency Indicators for Public Electricity Prodcution from Fossil Fuels" at
+# https://iea.blob.core.windows.net/assets/acaecb98-4430-4395-a4fa-d1a4d5ccb3d3/EnergyEfficiencyIndicatorsforPublicElectricityProductionfromFossilFuels.pdf
+# last accessed on 15 March, 2023.
+
+primary_energy_efficiency <- dplyr::tribble(
+  ~region,  ~sector,     ~technology, ~primary_energy_efficiency_factor,
+  "global", "power",       "coalcap",                             0.343,
+  "global", "power",        "gascap",                             0.395,
+  "global", "power",        "oilcap",                             0.365,
+  "global", "power",      "hydrocap",                                 1,
+  "global", "power",    "nuclearcap",                                 1,
+  "global", "power", "renewablescap",                                 1
+)
+
+# unit conversions are taken from: http://wds.iea.org/wds/pdf/WORLDBAL_Documentation.pdf
+# last accessed on 27 Feb 2023
+
+unit_conversion <- dplyr::tribble(
+  ~sector,       ~unit,             ~value_in_mtoe,
+  "coal",        "t coal",          7e-07,
+  "oil and gas", "GJ",              2.3885e-08,
+  "power",       "MWh",             8.598e-08
+)
+
+# calculate sector split----
+## wrangle input data----
+advanced_company_indicators <- advanced_company_indicators_raw %>%
+  janitor::clean_names() %>%
+  # to compare primary energy units, we need power generation, not power capacity
+  dplyr::filter(
+    (.data$asset_sector == "Power" & .data$activity_unit == "MWh") | .data$asset_sector != "Power"
+  ) %>%
+  dplyr::select(
+    -dplyr::all_of(
+      c(
+        starts_with("direct_ownership_"),
+        starts_with("financial_control_")
+      )
+    )
+  ) %>%
+  dplyr::rename_with(.fn = ~ gsub("asset_", "", .x)) %>%
+  tidyr::pivot_longer(
+    cols = dplyr::starts_with("equity_ownership_"),
+    names_to = "year",
+    names_prefix = "equity_ownership_",
+    values_to = "value",
+    values_ptypes = list("value" = numeric())
+  ) %>%
+  dplyr::mutate(year = as.numeric(.data$year)) %>%
+  dplyr::mutate(
+    sector = tolower(.data$sector),
+    sector = dplyr::case_when(
+      .data$sector == "oil&gas" ~ "oil and gas",
+      .data$sector == "ldv" ~ "automotive",
+      TRUE ~ .data$sector
+    ),
+    technology = dplyr::case_when(
+      .data$sector == "coal" ~ "coal",
+      .data$sector == "oil and gas" & grepl("Gas", .data$technology) ~ "gas",
+      .data$sector == "oil and gas" & grepl("Oil", .data$technology) ~ "oil",
+      .data$sector == "power" ~ tolower(.data$technology),
+      TRUE ~ tolower(.data$technology)
+    )
+  ) %>%
+  dplyr::filter(
+    !.data$sector %in% c("hdv", "shipping"),
+    !.data$activity_unit == "tkm"
+  ) %>%
+  dplyr::summarise(
+    value = sum(.data$value, na.rm = TRUE),
+    .by = c(
+      "company_id",
+      "company_name",
+      "sector",
+      "technology",
+      "activity_unit",
+      "year"
+    )
+  ) %>%
+  dplyr::rename(
+    name_company = "company_name",
+    production = "value",
+    production_unit = "activity_unit"
+  ) %>%
+  # we calculate the sector split based on the primary energy mix of the start year
+  dplyr::filter(.data$year == .env$start_year)
+
+# optional: remove inactive companies
+if (config_project_parameters$remove_inactive_companies) {
+  advanced_company_indicators <- advanced_company_indicators %>%
+    dplyr::anti_join(abcd_removed_inactive_companies, by = "company_id")
+}
+
+## determine sector splits by company----
+### count number of sectors and energy sectors per company----
+n_sectors_by_company <- advanced_company_indicators %>%
+  dplyr::mutate(
+    energy_sector = dplyr::if_else(
+      .data$sector %in% c("coal", "oil and gas", "power"), TRUE, FALSE
+    )
+  ) %>%
+  dplyr::distinct(
+    .data$company_id,
+    .data$sector,
+    .data$energy_sector
+  ) %>%
+  dplyr::mutate(
+    n_sectors = dplyr::n(),
+    .by = "company_id"
+  ) %>%
+  dplyr::summarise(
+    n_energy_sectors = sum(.data$energy_sector, na.rm = TRUE),
+    .by = c("company_id", "n_sectors")
+  )
+
+### identify companies active in more than one energy sector----
+companies_in_multiple_energy_sectors <- n_sectors_by_company %>%
+  dplyr::filter(.data$n_energy_sectors > 1) %>%
+  dplyr::pull(.data$company_id)
+
+## calculate equal weights sector split for all sectors----
+# for each company add sector split by number of sectors the company operates in equally
+sector_split_all_companies <- advanced_company_indicators %>%
+  dplyr::filter(
+    .data$year == .env$start_year
+  ) %>%
+  dplyr::inner_join(
+    n_sectors_by_company,
+    by = "company_id"
+  ) %>%
+  dplyr::mutate(
+    sector_split = 1 / .data$n_sectors
+  ) %>%
+  dplyr::summarise(
+    production = sum(.data$production, na.rm = TRUE),
+    n_sectors = max(.data$n_sectors, na.rm = TRUE),
+    n_energy_sectors = max(.data$n_energy_sectors, na.rm = TRUE),
+    sector_split = max(.data$sector_split, na.rm = TRUE),
+    .by = c("company_id", "name_company", "sector", "year", "production_unit")
+  )
+
+### check that the sum of the sector split of each company is 1----
+check_sector_split_all_companies <- sector_split_all_companies %>%
+  dplyr::summarise(
+    sum_share = sum(sector_split, na.rm = TRUE),
+    .by = "company_id"
+  )
+
+if (any(round(check_sector_split_all_companies$sum_share, 3) != 1)) {
+  stop("sector_split_all_companies contains companies for which the sum of the sector split deviates from 1")
+}
+
+## calculate primary energy-based sector split for energy sectors----
+# keep only companies that are active in multiple energy sectors
+sector_split_multi_energy_companies <- advanced_company_indicators %>%
+  dplyr::filter(
+    .data$company_id %in% .env$companies_in_multiple_energy_sectors,
+    .data$sector %in% c("coal", "oil and gas", "power"),
+    .data$year == .env$start_year
+  )
+
+# adjust power generation by primary energy efficiency
+sector_split_multi_energy_companies_power <- sector_split_multi_energy_companies %>%
+  dplyr::filter(.data$sector == "power") %>%
+  dplyr::inner_join(
+    primary_energy_efficiency,
+    by = c("sector", "technology")
+  ) %>%
+  dplyr::mutate(
+    production = .data$production / .data$primary_energy_efficiency_factor
+  ) %>%
+  dplyr::select(-"primary_energy_efficiency_factor")
+
+# transform all energy sectors to common unit of energy: mtoe
+sector_split_multi_energy_companies <- sector_split_multi_energy_companies %>%
+  dplyr::filter(.data$sector != "power") %>%
+  dplyr::bind_rows(sector_split_multi_energy_companies_power) %>%
+  dplyr::summarise(
+    production = sum(.data$production, na.rm = TRUE),
+    .by = c("company_id", "name_company", "sector", "year", "production_unit")
+  ) %>%
+  dplyr::inner_join(
+    unit_conversion,
+    by = c("sector", "production_unit" = "unit")
+  ) %>%
+  dplyr::mutate(
+    production = .data$production * .data$value_in_mtoe,
+    production_unit = "mtoe"
+  ) %>%
+  dplyr::select(-"value_in_mtoe")
+
+# get the sector split for each multi energy sector company based on common energy units
+sector_split_multi_energy_companies <- sector_split_multi_energy_companies %>%
+  dplyr::mutate(
+    sector_split = .data$production / sum(.data$production, na.rm = TRUE),
+    .by = c(
+      "company_id",
+      "name_company",
+      "year",
+      "production_unit"
+    )
+  )
+
+# wrangle
+sector_split_multi_energy_companies <- sector_split_multi_energy_companies %>%
+  dplyr::select(
+    dplyr::all_of(
+      c(
+        "company_id",
+        "name_company",
+        "sector",
+        "production_unit",
+        "production",
+        "sector_split"
+      )
+    )
+  )
+
+# keep only companies that are provided in input company list
+sector_split_multi_energy_companies <- sector_split_multi_energy_companies %>%
+  dplyr::filter(.data$company_id %in% company_ids_primary_energy_split)
+
+### check that the sum of the primary energy based sector split of each company is 1----
+check_sector_split_multi_energy_companies <- sector_split_multi_energy_companies %>%
+  dplyr::summarise(
+    sum_share = sum(sector_split, na.rm = TRUE),
+    .by = "company_id"
+  )
+
+if (any(round(check_sector_split_multi_energy_companies$sum_share, 3) != 1)) {
+  stop("sector_split_multi_energy_companies contains companies for which the sum of the sector split deviates from 1")
+}
+
+## combine the sector splits----
+# we want to use the plain equal weights split for companies that do not operate in more than one energy sector
+# for companies that operate in more than one energy sector, we want to scale the primary energy based split to the equal weights share of these sectors in the total company operations
+# this means that if a multi energy sector company only operates in energy sectors it will retain the primary energy based sector split
+# if a company operates in multiple energy sectors and non-energy sectors, we want to scale the primary energy based split to the equal weights share of the energy sectors to ensure the exosure to non-energy sectors is not lost
+sector_split_all_companies_final <- sector_split_all_companies %>%
+  dplyr::left_join(
+    sector_split_multi_energy_companies,
+    by = c("company_id", "name_company", "sector"),
+    suffix = c("_all", "_energy")
+  ) %>%
+  dplyr::mutate(
+    sector_split_energy_scaled = (.data$n_energy_sectors / .data$n_sectors) * .data$sector_split_energy,
+    sector_split = dplyr::if_else(
+      is.na(.data$sector_split_energy),
+      .data$sector_split_all,
+      .data$sector_split_energy_scaled
+    )
+  ) %>%
+  dplyr::rename(
+    production = "production_all",
+    production_unit = "production_unit_all"
+  )
+
+### check that the sum of the combined sector split of each company is 1----
+check_sector_split_all_companies_final <- sector_split_all_companies_final %>%
+  dplyr::summarise(
+    sum_share = sum(.data$sector_split, na.rm = TRUE),
+    .by = "company_id"
+  )
+
+if (any(round(check_sector_split_all_companies_final$sum_share, 3) != 1)) {
+  stop("sector_split_all_companies_final contains companies for which the sum of the sector split deviates from 1")
+}
+
+
+## write output----
+sector_split_multi_energy_companies %>%
+  dplyr::select(
+    all_of(
+      c(
+        "company_id",
+        "name_company",
+        "sector",
+        "sector_split"
+      )
+    )
+  ) %>%
+  readr::write_csv(
+    file.path(dir_matched, "companies_sector_split_primary_energy_only.csv"),
+    na = ""
+  )
+
+sector_split_all_companies %>%
+  dplyr::select(
+    all_of(
+      c(
+        "company_id",
+        "name_company",
+        "sector",
+        "sector_split"
+      )
+    )
+  ) %>%
+  readr::write_csv(
+    file.path(dir_matched, "companies_sector_split_equal_weights_only.csv"),
+    na = ""
+  )
+
+sector_split_all_companies_final %>%
+  dplyr::select(
+    all_of(
+      c(
+        "company_id",
+        "name_company",
+        "sector",
+        "sector_split"
+      )
+    )
+  ) %>%
+  readr::write_csv(
+    file.path(dir_matched, "companies_sector_split.csv"),
+    na = ""
+  )
diff --git a/run_match_prioritize.R b/run_match_prioritize.R
index bb267468..49ad59d6 100644
--- a/run_match_prioritize.R
+++ b/run_match_prioritize.R
@@ -5,14 +5,23 @@ library(readr)
 
 # source helpers----
 source("expected_columns.R")
+source("helper_functions.R")
 
 # load config----
 config_dir <- config::get("directories")
+config_files <- config::get("file_names")
+config_match_prio <- config::get("match_prioritize")
+config_prepare_sector_split <- config::get("sector_split")
+
 dir_matched <- config_dir$dir_matched
+path_abcd <- file.path(config_dir$dir_abcd, config_files$filename_abcd)
+sheet_abcd <- config_files$sheet_abcd
 
-config_match_prio <- config::get("match_prioritize")
 match_prio_priority <- config_match_prio$priority
 
+apply_sector_split <- config_prepare_sector_split$apply_sector_split
+sector_split_type_select <- config_prepare_sector_split$sector_split_type
+
 # validate config values----
 if (!length(dir_matched) == 1) {
   stop("Argument dir_matched must be of length 1. Please check your input.")
@@ -36,8 +45,9 @@ if (!is.null(match_prio_priority)) {
   }
 }
 
-# load manually matched files----
-list_matched_manual <- list.files(dir_matched)[grepl("^matched_lbk_.*_manual.csv$", list.files(dir_matched))]
+# load data----
+## load manually matched files----
+list_matched_manual <- list.files(path = dir_matched, pattern = "^matched_lbk_.*_manual[.]csv$")
 
 if (length(list_matched_manual) == 0) {
   stop(glue::glue("No manually matched loan book csvs found in {dir_matched}. Please check your project setup!"))
@@ -45,11 +55,44 @@ if (length(list_matched_manual) == 0) {
 
 matched_lbk_manual <- readr::read_csv(
   file = file.path(dir_matched, list_matched_manual),
-  col_types = col_types_matched_manual#,
-  # col_select = dplyr::all_of(col_select_matched_manual)
+  col_types = col_types_matched_manual
 ) %>%
   dplyr::group_split(.data$group_id)
 
+## optional: load sector split----
+if (apply_sector_split & sector_split_type_select == "equal_weights") {
+  companies_sector_split <- readr::read_csv(
+    file.path(dir_matched, "companies_sector_split.csv"),
+    col_types = col_types_companies_sector_split,
+    col_select = dplyr::all_of(col_select_companies_sector_split)
+  )
+
+  # TODO: better use prepared abcd?
+  abcd <- readxl::read_xlsx(
+    path = file.path(path_abcd),
+    sheet = sheet_abcd
+  ) %>% dplyr::select(
+    dplyr::all_of(cols_abcd)
+  ) %>%
+    dplyr::mutate(
+      company_id = as.numeric(.data$company_id),
+      name_company = as.character(.data$name_company),
+      lei = as.character(.data$lei),
+      is_ultimate_owner = as.logical(.data$is_ultimate_owner),
+      sector = as.character(.data$sector),
+      technology = as.character(.data$technology),
+      plant_location = as.character(.data$plant_location),
+      year = as.integer(.data$year),
+      production = as.numeric(.data$production),
+      production_unit = as.character(.data$production_unit),
+      emission_factor = as.numeric(.data$emission_factor),
+      emission_factor_unit = as.character(.data$emission_factor_unit)
+    )
+  if (!all(cols_abcd %in% names(abcd))) {
+    stop("Columns in abcd do not match expected input names. Please check your input.")
+  }
+}
+
 # prioritize and save files----
 for (i in 1:length(matched_lbk_manual)) {
   group_name <- unique(matched_lbk_manual[[i]]$group_id)
@@ -59,6 +102,15 @@ for (i in 1:length(matched_lbk_manual)) {
     r2dii.match::prioritize(priority = match_prio_priority) %>%
     dplyr::mutate(group_id = .env$group_name)
 
+  # optional: apply sector split----
+  if (apply_sector_split & sector_split_type_select == "equal_weights") {
+    matched_prio_i <- matched_prio_i %>%
+      apply_sector_split_to_loans(
+        abcd = abcd,
+        companies_sector_split = companies_sector_split
+      )
+  }
+
   ## write matched prioritized loan book to file----
   matched_prio_i %>%
     readr::write_csv(
@@ -66,3 +118,17 @@ for (i in 1:length(matched_lbk_manual)) {
       na = ""
     )
 }
+
+# optional: apply sector split----
+if (apply_sector_split & sector_split_type_select == "equal_weights") {
+  lost_companies_sector_split <- lost_companies_sector_split(
+    abcd = abcd,
+    companies_sector_split = companies_sector_split
+  )
+
+  lost_companies_sector_split %>%
+    readr::write_csv(
+      file = file.path(dir_matched, glue::glue("lost_companies_sector_split.csv.csv")),
+      na = ""
+    )
+}