From b741dbb9126fcba172cfc832b19bb241182b67db Mon Sep 17 00:00:00 2001 From: Zachary Schmidt Date: Tue, 29 Oct 2024 20:31:10 -0700 Subject: [PATCH 1/9] Add kaya_variables, kaya_factors, and kaya_lmdi methods to the compute module. Also add the kaya subdirectory that contains the implementation for the kaya methods. (#875) --- AUTHORS.rst | 1 + RELEASE_NOTES.md | 1 + pyam/compute.py | 244 ++++++++++++++++++ pyam/kaya/input_variable_names.py | 17 ++ pyam/kaya/kaya_factor_names.py | 6 + pyam/kaya/kaya_factors.py | 89 +++++++ pyam/kaya/kaya_variable_names.py | 3 + pyam/kaya/kaya_variables.py | 134 ++++++++++ pyam/kaya/lmdi.py | 362 +++++++++++++++++++++++++++ pyam/kaya/lmdi_names.py | 7 + tests/test_feature_kaya_factors.py | 124 +++++++++ tests/test_feature_kaya_lmdi.py | 157 ++++++++++++ tests/test_feature_kaya_variables.py | 119 +++++++++ 13 files changed, 1264 insertions(+) create mode 100644 pyam/kaya/input_variable_names.py create mode 100644 pyam/kaya/kaya_factor_names.py create mode 100644 pyam/kaya/kaya_factors.py create mode 100644 pyam/kaya/kaya_variable_names.py create mode 100644 pyam/kaya/kaya_variables.py create mode 100644 pyam/kaya/lmdi.py create mode 100644 pyam/kaya/lmdi_names.py create mode 100644 tests/test_feature_kaya_factors.py create mode 100644 tests/test_feature_kaya_lmdi.py create mode 100644 tests/test_feature_kaya_variables.py diff --git a/AUTHORS.rst b/AUTHORS.rst index c5e1b039d..ad155b86e 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -23,6 +23,7 @@ The following persons contributed to the development of the |pyam| package: - Pietro Monticone `@pitmonticone `_ - Edward Byers `@byersiiasa `_ - Fridolin Glatter `@glatterf42 `_ +- Zachary Schmidt `@zacharyschmidt `_ | The core maintenance of the |pyam| package is done by the *Scenario Services & Scientific Software* research theme diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 8d098b911..c3a261424 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -1,5 +1,6 @@ # Next release +- [#875](https://github.com/IAMconsortium/pyam/pull/875) Add methods to the `compute` module implementing Kaya decomposition analysis. - [#880](https://github.com/IAMconsortium/pyam/pull/880) Use `pd.Series.iloc[pos]` for forward-compatibility - [#877](https://github.com/IAMconsortium/pyam/pull/xxx) Support `engine` and other `pd.ExcelFile` keywords. diff --git a/pyam/compute.py b/pyam/compute.py index f2c247eb0..aaf979277 100644 --- a/pyam/compute.py +++ b/pyam/compute.py @@ -4,8 +4,10 @@ import pandas as pd import wquantiles +import pyam from pyam._debiasing import _compute_bias from pyam.index import replace_index_values +from pyam.kaya import kaya_factors, kaya_variables, lmdi from pyam.timeseries import growth_rate from pyam.utils import remove_from_list @@ -249,6 +251,248 @@ def bias(self, name, method, axis): """ _compute_bias(self._df, name, method, axis) + def kaya_variables(self, scenarios, append=False): + """Compute the variables needed to compute Kaya factors + for the Kaya Decomposition Analysis. + + Parameters + ---------- + scenarios : iterable of tuples (model, scenario, region) + The (model, scenario, region) combinations to be included. + append : bool, optional + Whether to append computed timeseries data to this instance. + + Returns + ------- + :class:`IamDataFrame` or **None** + Computed timeseries data or None if `append=True`. + + Notes + ----- + + Example of calling the method: + + .. code-block:: python + + df.compute.kaya_variables(scenarios=[("model_a", "scenario_a", "region_a"), + ("model_b", "scenario_b", "region_b")], + append=True) + + The IamDataFrame must contain the following variables, otherwise the method + will return None: + .. list-table:: + - Required Variables + - Population + - GDP (MER or PPP) + - Final Energy + - Primary Energy + - Primary Energy|Coal + - Primary Energy|Oil + - Primary Energy|Gas + - Emissions|CO2|Industrial Processes + - Emissions|CO2|Carbon Capture and Storage + - Emissions|CO2|Carbon Capture and Storage|Biomass + - Emissions|CO2|Fossil Fuels and Industry + - Emissions|CO2|AFOLU + - Carbon Sequestration|CCS|Fossil|Energy + - Carbon Sequestration|CCS|Fossil|Industrial Processes + - Carbon Sequestration|CCS|Biomass|Energy + - Carbon Sequestration|CCS|Biomass|Industrial Processes + + """ + valid_scenarios = _validate_kaya_scenario_args(scenarios=scenarios) + if valid_scenarios is None: + return None + kaya_variables_frame = kaya_variables.kaya_variables(self._df, valid_scenarios) + if kaya_variables_frame is None: + return None + if append: + self._df.append( + _find_non_duplicate_rows(self._df, kaya_variables_frame), inplace=True + ) + + return kaya_variables_frame + + def kaya_factors(self, scenarios, append=False): + """Compute the Kaya factors needed to compute factors + for the Kaya Decomposition Analysis. + + Parameters + ---------- + scenarios : iterable of tuples (model, scenario, region) + The (model, scenario, region) combinations to be included. + append : bool, optional + Whether to append computed timeseries data to this instance. + + Returns + ------- + :class:`IamDataFrame` or **None** + Computed timeseries data or None if `append=True`. + + Notes + ----- + + Example of calling the method: + + .. code-block:: python + + df.compute.kaya_factors(scenarios=[("model_a", "scenario_a", "region_a"), + ("model_b", "scenario_b", "region_b")], + append=True) + + The IamDataFrame must contain the following variables, otherwise the method + will return None: + .. list-table:: + - Required Variables + - Population + - GDP (MER or PPP) + - Final Energy + - Primary Energy + - Primary Energy|Coal + - Primary Energy|Oil + - Primary Energy|Gas + - Emissions|CO2|Industrial Processes + - Emissions|CO2|Carbon Capture and Storage + - Emissions|CO2|Carbon Capture and Storage|Biomass + - Emissions|CO2|Fossil Fuels and Industry + - Emissions|CO2|AFOLU + - Carbon Sequestration|CCS|Fossil|Energy + - Carbon Sequestration|CCS|Fossil|Industrial Processes + - Carbon Sequestration|CCS|Biomass|Energy + - Carbon Sequestration|CCS|Biomass|Industrial Processes + + """ + valid_scenarios = _validate_kaya_scenario_args(scenarios=scenarios) + if valid_scenarios is None: + return None + kaya_variables = self.kaya_variables(valid_scenarios, append=False) + if kaya_variables is None: + return None + kaya_factors_frame = kaya_factors.kaya_factors(kaya_variables, valid_scenarios) + if kaya_factors_frame is None: + return None + if append: + self._df.append( + _find_non_duplicate_rows(self._df, kaya_factors_frame), inplace=True + ) + return kaya_factors_frame + + def kaya_lmdi(self, ref_scenario, int_scenario, append=False): + """Calculate the logarithmic mean Divisia index (LMDI) decomposition + using Kaya factors. + + Parameters + ---------- + ref_scenario : tuple of strings (model, scenario, region) + The (model, scenario, region) to be used as the reference scenario + in the LMDI calculation. + int_scenario : tuple of strings (model, scenario, region) + The (model, scenario, region) to be used as the intervention scenario + in the LMDI calculation. + append : bool, optional + Whether to append computed timeseries data to this instance. + + Returns + ------- + :class:`IamDataFrame` or **None** + Computed timeseries data or None if `append=True`. + + Notes + ----- + + Example of calling the method: + + .. code-block:: python + + df.compute.kaya_lmdi(ref_scenario=("model_a", "scenario_a", "region_a"), + int_scenario=("model_b", "scenario_b", "region_b"), + append=True) + + The IamDataFrame must contain the following variables, otherwise the method + will return None: + .. list-table:: + - Required Variables + - Population + - GDP (MER or PPP) + - Final Energy + - Primary Energy + - Primary Energy|Coal + - Primary Energy|Oil + - Primary Energy|Gas + - Emissions|CO2|Industrial Processes + - Emissions|CO2|Carbon Capture and Storage + - Emissions|CO2|Carbon Capture and Storage|Biomass + - Emissions|CO2|Fossil Fuels and Industry + - Emissions|CO2|AFOLU + - Carbon Sequestration|CCS|Fossil|Energy + - Carbon Sequestration|CCS|Fossil|Industrial Processes + - Carbon Sequestration|CCS|Biomass|Energy + - Carbon Sequestration|CCS|Biomass|Industrial Processes + + The model, scenario, and region fields for the results dataframe will be + concatenated values from the reference and intervention scenarios in the + form reference_scenario_value::intervention_scenario_value. + + Example results data: + + model scenario region variable unit year value + model_a::model_a scen_a::scen_b World::World FE/GNP (LMDI) unknown 2010 1.321788 + model_a::model_a scen_a::scen_b World::World GNP/P (LMDI) unknown 2010 0.000000 + model_a::model_a scen_a::scen_b World::World PEDEq/FE (LMDI) unknown 2010 0.816780 + model_a::model_a scen_a::scen_b World::World PEFF/PEDEq (LMDI) unknown 2010 0.000000 + model_a::model_a scen_a::scen_b World::World Population (LMDI) unknown 2010 0.000000 + model_a::model_a scen_a::scen_b World::World TFC/PEFF (LMDI) unknown 2010 4.853221 + + """ + valid_ref_and_int_scenarios = _validate_kaya_scenario_args( + scenarios=[ref_scenario, int_scenario] + ) + # we must have two different scenarios to calculate kaya_lmdi + if (valid_ref_and_int_scenarios is None) or ( + len(valid_ref_and_int_scenarios) != 2 + ): + return None + kaya_factors = self.kaya_factors(valid_ref_and_int_scenarios, append=False) + if kaya_factors is None: + return None + kaya_lmdi_frame = lmdi.corrected_lmdi(kaya_factors, ref_scenario, int_scenario) + if kaya_lmdi_frame is None: + return None + if append: + self._df.append( + _find_non_duplicate_rows(self._df, kaya_lmdi_frame), inplace=True + ) + return kaya_lmdi_frame + + +def _validate_kaya_scenario_args(scenarios): + validated_scenarios = [] + for scenario in scenarios: + if (len(scenario) == 3) and _kaya_args_are_strings(scenario): + validated_scenarios.append(scenario) + # don't recalculate for identical scenarios + unique_scenarios = set(scenarios) + if len(unique_scenarios) == 0: + return None + return validated_scenarios + + +def _kaya_args_are_strings(scenario): + for arg in scenario: + if not isinstance(arg, str): + return False + return True + + +def _find_non_duplicate_rows(original_df, variables_to_add): + variables_for_append = pyam.IamDataFrame( + variables_to_add.as_pandas(meta_cols=False) + .merge(original_df.as_pandas(meta_cols=False), how="left", indicator=True) + .query('_merge=="left_only"') + .drop(columns="_merge") + ) + return variables_for_append + def _compute_learning_rate(x, performance, experience): """Internal implementation for computing implicit learning rate from timeseries data diff --git a/pyam/kaya/input_variable_names.py b/pyam/kaya/input_variable_names.py new file mode 100644 index 000000000..30e138e25 --- /dev/null +++ b/pyam/kaya/input_variable_names.py @@ -0,0 +1,17 @@ +POPULATION = "Population" +GDP_MER = "GDP|MER" +GDP_PPP = "GDP|PPP" +FINAL_ENERGY = "Final Energy" +PRIMARY_ENERGY = "Primary Energy" +PRIMARY_ENERGY_COAL = "Primary Energy|Coal" +PRIMARY_ENERGY_OIL = "Primary Energy|Oil" +PRIMARY_ENERGY_GAS = "Primary Energy|Gas" +EMISSIONS_CO2_INDUSTRIAL_PROCESSES = "Emissions|CO2|Industrial Processes" +EMISSIONS_CO2_CCS = "Emissions|CO2|Carbon Capture and Storage" +EMISSIONS_CO2_CCS_BIOMASS = "Emissions|CO2|Carbon Capture and Storage|Biomass" +EMISSIONS_CO2_FOSSIL_FUELS_AND_INDUSTRY = "Emissions|CO2|Fossil Fuels and Industry" +EMISSIONS_CO2_AFOLU = "Emissions|CO2|AFOLU" +CCS_FOSSIL_ENERGY = "Carbon Sequestration|CCS|Fossil|Energy" +CCS_FOSSIL_INDUSTRY = "Carbon Sequestration|CCS|Fossil|Industrial Processes" +CCS_BIOMASS_ENERGY = "Carbon Sequestration|CCS|Biomass|Energy" +CCS_BIOMASS_INDUSTRY = "Carbon Sequestration|CCS|Biomass|Industrial Processes" diff --git a/pyam/kaya/kaya_factor_names.py b/pyam/kaya/kaya_factor_names.py new file mode 100644 index 000000000..664700ca8 --- /dev/null +++ b/pyam/kaya/kaya_factor_names.py @@ -0,0 +1,6 @@ +GNP_per_P = "GNP/P" +FE_per_GNP = "FE/GNP" +PEdeq_per_FE = "PEDEq/FE" +PEFF_per_PEDEq = "PEFF/PEDEq" +TFC_per_PEFF = "TFC/PEFF" +NFC_per_TFC = "NFC/TFC" diff --git a/pyam/kaya/kaya_factors.py b/pyam/kaya/kaya_factors.py new file mode 100644 index 000000000..bc0b2bb27 --- /dev/null +++ b/pyam/kaya/kaya_factors.py @@ -0,0 +1,89 @@ +from functools import reduce + +from pyam.kaya import input_variable_names, kaya_factor_names, kaya_variable_names + + +def kaya_factors(kaya_variables_frame, scenarios): + kaya_factors_frames = [] + for scenario in scenarios: + input = kaya_variables_frame.filter( + model=scenario[0], scenario=scenario[1], region=scenario[2] + ) + if input.empty: + break + kaya_factors_frames.append(_calc_gnp_per_p(input)) + kaya_factors_frames.append(_calc_fe_per_gnp(input)) + kaya_factors_frames.append(_calc_pedeq_per_fe(input)) + kaya_factors_frames.append(_calc_peff_per_pedeq(input)) + kaya_factors_frames.append(_calc_tfc_per_peff(input)) + kaya_factors_frames.append(_calc_nfc_per_tfc(input)) + kaya_factors_frames.append( + input.filter( + variable=[kaya_variable_names.TFC, input_variable_names.POPULATION] + ) + ) + if len(kaya_factors_frames) == 0: + return None + return reduce(lambda x, y: x.append(y), kaya_factors_frames) + + +def _calc_gnp_per_p(input_data): + variable = input_variable_names.GDP_PPP + if input_data.filter(variable=variable).empty: + variable = input_variable_names.GDP_MER + return input_data.divide( + variable, + input_variable_names.POPULATION, + kaya_factor_names.GNP_per_P, + append=False, + ) + + +def _calc_fe_per_gnp(input_data): + variable = input_variable_names.GDP_PPP + if input_data.filter(variable=variable).empty: + variable = input_variable_names.GDP_MER + return input_data.divide( + input_variable_names.FINAL_ENERGY, + variable, + kaya_factor_names.FE_per_GNP, + append=False, + ) + + +def _calc_pedeq_per_fe(input_data): + return input_data.divide( + input_variable_names.PRIMARY_ENERGY, + input_variable_names.FINAL_ENERGY, + kaya_factor_names.PEdeq_per_FE, + append=False, + ) + + +def _calc_peff_per_pedeq(input_data): + return input_data.divide( + kaya_variable_names.PRIMARY_ENERGY_FF, + input_variable_names.PRIMARY_ENERGY, + kaya_factor_names.PEFF_per_PEDEq, + append=False, + ) + + +def _calc_tfc_per_peff(input_data): + return input_data.divide( + kaya_variable_names.TFC, + kaya_variable_names.PRIMARY_ENERGY_FF, + kaya_factor_names.TFC_per_PEFF, + ignore_units="Mt CO2/EJ", + append=False, + ) + + +def _calc_nfc_per_tfc(input_data): + return input_data.divide( + kaya_variable_names.NFC, + kaya_variable_names.TFC, + kaya_factor_names.NFC_per_TFC, + ignore_units=True, + append=False, + ).rename(unit={"unknown": ""}) diff --git a/pyam/kaya/kaya_variable_names.py b/pyam/kaya/kaya_variable_names.py new file mode 100644 index 000000000..31efa86d5 --- /dev/null +++ b/pyam/kaya/kaya_variable_names.py @@ -0,0 +1,3 @@ +PRIMARY_ENERGY_FF = "Primary Energy|Fossil" +TFC = "Total Fossil Carbon" +NFC = "Net Fossil Carbon" diff --git a/pyam/kaya/kaya_variables.py b/pyam/kaya/kaya_variables.py new file mode 100644 index 000000000..2ee4fa410 --- /dev/null +++ b/pyam/kaya/kaya_variables.py @@ -0,0 +1,134 @@ +import logging +from functools import reduce + +from pyam.kaya import input_variable_names, kaya_variable_names + +logger = logging.getLogger(__name__) + +input_variable_list = [ + vars(input_variable_names)[variable_name] + for variable_name in dir(input_variable_names) + if not variable_name.startswith("__") +] + + +def kaya_variables(input_data, scenarios): + # copy data so we don't create side effects + # in particular, require_data will change the "exclude" series + input_data = input_data.copy() + validated_input_data = _validate_input_data(input_data) + if validated_input_data.empty: + return None + kaya_variable_frames = [] + for scenario in scenarios: + input = validated_input_data.filter( + model=scenario[0], scenario=scenario[1], region=scenario[2] + ) + if input.empty: + break + kaya_variable_frames.append(_calc_pop(input)) + kaya_variable_frames.append(_calc_gdp(input)) + kaya_variable_frames.append(_calc_fe(input)) + kaya_variable_frames.append(_calc_pe(input)) + kaya_variable_frames.append(_calc_pe_ff(input)) + kaya_variable_frames.append(_calc_tfc(input)) + kaya_variable_frames.append(_calc_nfc(input)) + + if len(kaya_variable_frames) == 0: + return None + # append all the IamDataFrames into one + return reduce(lambda x, y: x.append(y), kaya_variable_frames) + + +def _validate_input_data(input_data): + missing_variables = input_data.require_data( + variable=input_variable_list, exclude_on_fail=True + ) + if missing_variables is not None: + logger.info( + f"These variables are missing from the \ + scenarios in input_data:\n{missing_variables}" + ) + return input_data.filter(exclude=False) + + +def _calc_pop(input_data): + return input_data.filter(variable=input_variable_names.POPULATION) + + +def _calc_gdp(input_data): + variable = input_variable_names.GDP_PPP + if input_data.filter(variable=variable).empty: + variable = input_variable_names.GDP_MER + return input_data.filter(variable=variable) + + +def _calc_fe(input_data): + return input_data.filter(variable=input_variable_names.FINAL_ENERGY) + + +def _calc_pe(input_data): + return input_data.filter(variable=input_variable_names.PRIMARY_ENERGY) + + +def _calc_pe_ff(input_data): + input_data = input_data.copy() + input_data.add( + input_variable_names.PRIMARY_ENERGY_COAL, + input_variable_names.PRIMARY_ENERGY_OIL, + "pe_coal_oil", + append=True, + ) + return input_data.add( + input_variable_names.PRIMARY_ENERGY_GAS, + "pe_coal_oil", + kaya_variable_names.PRIMARY_ENERGY_FF, + ) + + +def _calc_nfc(input_data): + input_data = input_data.copy() + input_data.subtract( + input_variable_names.EMISSIONS_CO2_FOSSIL_FUELS_AND_INDUSTRY, + input_variable_names.EMISSIONS_CO2_INDUSTRIAL_PROCESSES, + "net_energy_emissions_with_biomass_ccs", + ignore_units="Mt CO2/yr", + append=True, + ) + return input_data.add( + input_variable_names.EMISSIONS_CO2_CCS_BIOMASS, + "net_energy_emissions_with_biomass_ccs", + kaya_variable_names.NFC, + ignore_units="Mt CO2/yr", + append=False, + ) + + +def _calc_tfc(input_data): + input_data = input_data.copy() + ccs_fossil_energy = _calc_ccs_fossil_energy(input_data) + nfc = _calc_nfc(input_data) + nfc_with_ccs_fossil_energy = nfc.append(ccs_fossil_energy) + return nfc_with_ccs_fossil_energy.add( + "ccs_fossil_energy", + kaya_variable_names.NFC, + kaya_variable_names.TFC, + ignore_units="Mt CO2/yr", + ) + + +def _calc_ccs_fossil_energy(input_data): + input_data = input_data.copy() + input_data.subtract( + input_variable_names.EMISSIONS_CO2_CCS, + input_variable_names.EMISSIONS_CO2_CCS_BIOMASS, + "ccs_fossil", + ignore_units="Mt CO2/yr", + append=True, + ) + return input_data.subtract( + "ccs_fossil", + input_variable_names.CCS_FOSSIL_INDUSTRY, + "ccs_fossil_energy", + ignore_units="Mt CO2/yr", + ) diff --git a/pyam/kaya/lmdi.py b/pyam/kaya/lmdi.py new file mode 100644 index 000000000..62f89ceda --- /dev/null +++ b/pyam/kaya/lmdi.py @@ -0,0 +1,362 @@ +from functools import reduce + +import numpy as np +import pandas as pd + +import pyam +from pyam.kaya import ( + input_variable_names, + kaya_factor_names, + kaya_variable_names, + lmdi_names, +) + + +def corrected_lmdi(kaya_factors, ref_scenario, int_scenario): + ref_input = ( + kaya_factors.filter( + model=ref_scenario[0], scenario=ref_scenario[1], region=ref_scenario[2] + ) + .as_pandas() + .assign(scenario_class="reference") + ) + int_input = ( + kaya_factors.filter( + model=int_scenario[0], scenario=int_scenario[1], region=int_scenario[2] + ) + .as_pandas() + .assign(scenario_class="intervention") + ) + input = pyam.IamDataFrame(pd.concat([ref_input, int_input])) + + uncorrected = _uncorrected_lmdi(input) + non_neg = _lmdi_non_neg(uncorrected) + total_non_neg = _sum_lmdi_non_neg(non_neg) + total_w_neg = _tfc_diff(input) + difference = total_non_neg.append(total_w_neg).subtract( + "total_no_neg", "tfc_diff", "difference", append=False, ignore_units=True + ) + + lmdi_frames = [] + p_percent = _calc_percent_of_total_for_one_term( + non_neg, lmdi_names.Pop_LMDI, total_non_neg + ) + p_correction = p_percent.append(difference).multiply( + lmdi_names.Pop_LMDI, "difference", "correction", ignore_units=True + ) + p_corrected = p_correction.append(non_neg).add( + lmdi_names.Pop_LMDI, "correction", lmdi_names.Pop_LMDI, ignore_units=True + ) + lmdi_frames.append(p_corrected) + + gnp_per_p_percent = _calc_percent_of_total_for_one_term( + non_neg, lmdi_names.GNP_per_P_LMDI, total_non_neg + ) + gnp_per_p_correction = gnp_per_p_percent.append(difference).multiply( + lmdi_names.GNP_per_P_LMDI, "difference", "correction", ignore_units=True + ) + gnp_per_p_corrected = gnp_per_p_correction.append(non_neg).add( + lmdi_names.GNP_per_P_LMDI, + "correction", + lmdi_names.GNP_per_P_LMDI, + ignore_units=True, + ) + lmdi_frames.append(gnp_per_p_corrected) + + fe_per_gnp_percent = _calc_percent_of_total_for_one_term( + non_neg, lmdi_names.FE_per_GNP_LMDI, total_non_neg + ) + fe_per_gnp_correction = fe_per_gnp_percent.append(difference).multiply( + lmdi_names.FE_per_GNP_LMDI, "difference", "correction", ignore_units=True + ) + fe_per_gnp_corrected = fe_per_gnp_correction.append(non_neg).add( + lmdi_names.FE_per_GNP_LMDI, + "correction", + lmdi_names.FE_per_GNP_LMDI, + ignore_units=True, + ) + lmdi_frames.append(fe_per_gnp_corrected) + + pedeq_per_fe_percent = _calc_percent_of_total_for_one_term( + non_neg, lmdi_names.PEdeq_per_FE_LMDI, total_non_neg + ) + pedeq_per_fe_correction = pedeq_per_fe_percent.append(difference).multiply( + lmdi_names.PEdeq_per_FE_LMDI, "difference", "correction", ignore_units=True + ) + pedeq_per_fe_corrected = pedeq_per_fe_correction.append(non_neg).add( + lmdi_names.PEdeq_per_FE_LMDI, + "correction", + lmdi_names.PEdeq_per_FE_LMDI, + ignore_units=True, + ) + lmdi_frames.append(pedeq_per_fe_corrected) + + peff_per_pedeq_percent = _calc_percent_of_total_for_one_term( + non_neg, lmdi_names.PEFF_per_PEDEq_LMDI, total_non_neg + ) + peff_per_pedeq_correction = peff_per_pedeq_percent.append(difference).multiply( + lmdi_names.PEFF_per_PEDEq_LMDI, "difference", "correction", ignore_units=True + ) + peff_per_pedeq_corrected = peff_per_pedeq_correction.append(non_neg).add( + lmdi_names.PEFF_per_PEDEq_LMDI, + "correction", + lmdi_names.PEFF_per_PEDEq_LMDI, + ignore_units=True, + ) + lmdi_frames.append(peff_per_pedeq_corrected) + + tfc_per_peff_percent = _calc_percent_of_total_for_one_term( + non_neg, lmdi_names.TFC_per_PEFF_LMDI, total_non_neg + ) + tfc_per_peff_correction = tfc_per_peff_percent.append(difference).multiply( + lmdi_names.TFC_per_PEFF_LMDI, "difference", "correction", ignore_units=True + ) + tfc_per_peff_corrected = tfc_per_peff_correction.append(non_neg).add( + lmdi_names.TFC_per_PEFF_LMDI, + "correction", + lmdi_names.TFC_per_PEFF_LMDI, + ignore_units=True, + ) + lmdi_frames.append(tfc_per_peff_corrected) + + full_lmdi = reduce(lambda x, y: x.append(y), lmdi_frames) + full_lmdi_no_scenario_class_column = pyam.IamDataFrame( + full_lmdi.as_pandas().drop(columns="scenario_class") + ) + return full_lmdi_no_scenario_class_column + + +def _lmdi_non_neg(uncorrected): + p_non_neg = _calc_one_non_negative_term(uncorrected, lmdi_names.Pop_LMDI) + gnp_per_p_non_neg = _calc_one_non_negative_term( + uncorrected, lmdi_names.GNP_per_P_LMDI + ) + fe_per_gnp_non_neg = _calc_one_non_negative_term( + uncorrected, lmdi_names.FE_per_GNP_LMDI + ) + pedeq_per_fe_non_neg = _calc_one_non_negative_term( + uncorrected, lmdi_names.PEdeq_per_FE_LMDI + ) + peff_per_pedeq_non_neg = _calc_one_non_negative_term( + uncorrected, lmdi_names.PEFF_per_PEDEq_LMDI + ) + tfc_per_peff_non_neg = _calc_one_non_negative_term( + uncorrected, lmdi_names.TFC_per_PEFF_LMDI + ) + + return ( + p_non_neg.append(gnp_per_p_non_neg) + .append(fe_per_gnp_non_neg) + .append(pedeq_per_fe_non_neg) + .append(peff_per_pedeq_non_neg) + .append(tfc_per_peff_non_neg) + ) + + +def _sum_lmdi_non_neg(lmdi_non_neg): + lmdi_non_neg.add( + lmdi_names.Pop_LMDI, + lmdi_names.GNP_per_P_LMDI, + "sum_to_GNP_per_P_LMDI", + append=True, + ignore_units=True, + ) + lmdi_non_neg.add( + "sum_to_GNP_per_P_LMDI", + lmdi_names.FE_per_GNP_LMDI, + "sum_to_FE_per_GNP_LMDI", + append=True, + ignore_units=True, + ) + lmdi_non_neg.add( + "sum_to_FE_per_GNP_LMDI", + lmdi_names.PEdeq_per_FE_LMDI, + "sum_to_PEdeq_per_FE_LMDI", + append=True, + ignore_units=True, + ) + lmdi_non_neg.add( + "sum_to_PEdeq_per_FE_LMDI", + lmdi_names.PEFF_per_PEDEq_LMDI, + "sum_to_PEFF_per_PEDEq_LMDI", + append=True, + ignore_units=True, + ) + return lmdi_non_neg.add( + "sum_to_PEFF_per_PEDEq_LMDI", + lmdi_names.TFC_per_PEFF_LMDI, + "total_no_neg", + append=False, + ignore_units=True, + ) + + +def _calc_percent_of_total_for_one_term(non_neg, lmdi_term_name, tfc_diff): + return non_neg.append(tfc_diff).divide( + lmdi_term_name, "total_no_neg", lmdi_term_name, ignore_units=True + ) + + +def _tfc_diff(kaya_factors): + + (combined_model_name, combined_scenario_name, combined_region_name) = ( + _make_combined_scenario_name(kaya_factors.as_pandas()) + ) + tfc = ( + kaya_factors.filter( + variable=kaya_variable_names.TFC, scenario_class="reference" + ) + .rename(variable={kaya_variable_names.TFC: "tfc_ref"}) + .append( + kaya_factors.filter( + variable=kaya_variable_names.TFC, scenario_class="intervention" + ) + ) + ) + tfc = pyam.IamDataFrame( + tfc.as_pandas() + .assign(scenario_class="LMDI") + .assign( + model=combined_model_name, + scenario=combined_scenario_name, + region=combined_region_name, + ) + ) + return tfc.subtract( + "tfc_ref", kaya_variable_names.TFC, "tfc_diff", ignore_units=True + ) + + +def _calc_one_non_negative_term(uncorrected_lmdi, lmdi_term_name): + return uncorrected_lmdi.apply( + _remove_negative, lmdi_term_name, args=[lmdi_term_name], ignore_units=True + ) + + +def _remove_negative(lmdi_term): + return lmdi_term.clip(lower=0) + + +def _uncorrected_lmdi(kaya_factors): + + p = _calc_one_lmdi_term( + kaya_factors, input_variable_names.POPULATION, lmdi_names.Pop_LMDI + ) + gnp_per_p = _calc_one_lmdi_term( + kaya_factors, kaya_factor_names.GNP_per_P, lmdi_names.GNP_per_P_LMDI + ) + fe_per_gnp = _calc_one_lmdi_term( + kaya_factors, kaya_factor_names.FE_per_GNP, lmdi_names.FE_per_GNP_LMDI + ) + pe_deq_per_fe = _calc_one_lmdi_term( + kaya_factors, kaya_factor_names.PEdeq_per_FE, lmdi_names.PEdeq_per_FE_LMDI + ) + peff_per_pe_deq = _calc_one_lmdi_term( + kaya_factors, kaya_factor_names.PEFF_per_PEDEq, lmdi_names.PEFF_per_PEDEq_LMDI + ) + tfc_per_peff = _calc_one_lmdi_term( + kaya_factors, kaya_factor_names.TFC_per_PEFF, lmdi_names.TFC_per_PEFF_LMDI + ) + return ( + p.append(gnp_per_p) + .append(fe_per_gnp) + .append(pe_deq_per_fe) + .append(peff_per_pe_deq) + .append(tfc_per_peff) + ) + + +def _calc_one_lmdi_term( + input_data, + kaya_factor_name, + lmdi_term_name, + kaya_product_name=kaya_variable_names.TFC, +): + return input_data.apply( + _lmdi, + lmdi_term_name, + axis="variable", + args=[kaya_factor_name, kaya_product_name], + ignore_units=True, + ) + + +def _lmdi(kaya_factor, kaya_product): + + (combined_model_name, combined_scenario_name, combined_region_name) = ( + _make_combined_scenario_name(kaya_factor) + ) + + factor_ref = ( + kaya_factor.reset_index() + .query('scenario_class == "reference"') + .assign( + model=combined_model_name, + scenario=combined_scenario_name, + region=combined_region_name, + ) + .assign(scenario_class="LMDI") + .set_index(list(kaya_factor.reset_index().columns[:-1])) + .rename(columns=lambda x: "value") + ) + + factor_int = ( + kaya_factor.reset_index() + .query('scenario_class == "intervention"') + .assign( + model=combined_model_name, + scenario=combined_scenario_name, + region=combined_region_name, + ) + .assign(scenario_class="LMDI") + .set_index(list(kaya_factor.reset_index().columns[:-1])) + .rename(columns=lambda x: "value") + ) + tfc_ref = ( + kaya_product.reset_index() + .query('scenario_class == "reference"') + .assign( + model=combined_model_name, + scenario=combined_scenario_name, + region=combined_region_name, + ) + .assign(scenario_class="LMDI") + .set_index(list(kaya_factor.reset_index().columns[:-1])) + .rename(columns=lambda x: "value") + ) + tfc_int = ( + kaya_product.reset_index() + .query('scenario_class == "intervention"') + .assign( + model=combined_model_name, + scenario=combined_scenario_name, + region=combined_region_name, + ) + .assign(scenario_class="LMDI") + .set_index(list(kaya_factor.reset_index().columns[:-1])) + .rename(columns=lambda x: "value") + ) + return ( + ((tfc_ref - tfc_int) / (np.log(tfc_ref) - np.log(tfc_int))) + * (np.log(factor_ref / factor_int)) + ).squeeze(axis=1) + + +def _make_combined_scenario_name(kaya_factor): + ref = kaya_factor.reset_index().query('scenario_class == "reference"') + int = kaya_factor.reset_index().query('scenario_class == "intervention"') + + ref_model_name = ref.model.values[0] + int_model_name = int.model.values[0] + + ref_scenario_name = ref.scenario.values[0] + int_scenario_name = int.scenario.values[0] + + ref_region_name = ref.region.values[0] + int_region_name = int.region.values[0] + + return ( + ref_model_name + "::" + int_model_name, + ref_scenario_name + "::" + int_scenario_name, + ref_region_name + "::" + int_region_name, + ) diff --git a/pyam/kaya/lmdi_names.py b/pyam/kaya/lmdi_names.py new file mode 100644 index 000000000..4f47a5c7c --- /dev/null +++ b/pyam/kaya/lmdi_names.py @@ -0,0 +1,7 @@ +Pop_LMDI = "Population (LMDI)" +GNP_per_P_LMDI = "GNP/P (LMDI)" +FE_per_GNP_LMDI = "FE/GNP (LMDI)" +PEdeq_per_FE_LMDI = "PEDEq/FE (LMDI)" +PEFF_per_PEDEq_LMDI = "PEFF/PEDEq (LMDI)" +TFC_per_PEFF_LMDI = "TFC/PEFF (LMDI)" +NFC_per_TFC_LMDI = "NFC/TFC (LMDI)" diff --git a/tests/test_feature_kaya_factors.py b/tests/test_feature_kaya_factors.py new file mode 100644 index 000000000..c3b7672a6 --- /dev/null +++ b/tests/test_feature_kaya_factors.py @@ -0,0 +1,124 @@ +import pandas as pd +import pytest + +from pyam import IamDataFrame +from pyam.kaya import input_variable_names, kaya_factor_names, kaya_variable_names +from pyam.testing import assert_iamframe_equal + +TEST_DF = IamDataFrame( + pd.DataFrame( + [ + [input_variable_names.POPULATION, "million", 1000], + [input_variable_names.GDP_PPP, "billion USD_2005/yr", 6], + [input_variable_names.GDP_MER, "billion USD_2005/yr", 5], + [input_variable_names.FINAL_ENERGY, "EJ/yr", 8], + [input_variable_names.PRIMARY_ENERGY, "EJ/yr", 10], + [input_variable_names.PRIMARY_ENERGY_COAL, "EJ/yr", 5], + [input_variable_names.PRIMARY_ENERGY_GAS, "EJ/yr", 2], + [input_variable_names.PRIMARY_ENERGY_OIL, "EJ/yr", 2], + [ + input_variable_names.EMISSIONS_CO2_FOSSIL_FUELS_AND_INDUSTRY, + "Mt CO2/yr", + 10, + ], + [input_variable_names.EMISSIONS_CO2_INDUSTRIAL_PROCESSES, "Mt CO2/yr", 1], + [input_variable_names.EMISSIONS_CO2_AFOLU, "Mt CO2/yr", 1], + [input_variable_names.EMISSIONS_CO2_CCS, "Mt CO2/yr", 4], + [input_variable_names.EMISSIONS_CO2_CCS_BIOMASS, "Mt CO2/yr", 1], + [input_variable_names.CCS_FOSSIL_ENERGY, "Mt CO2/yr", 2], + [input_variable_names.CCS_FOSSIL_INDUSTRY, "Mt CO2/yr", 1], + [input_variable_names.CCS_BIOMASS_ENERGY, "Mt CO2/yr", 0.5], + [input_variable_names.CCS_BIOMASS_INDUSTRY, "Mt CO2/yr", 0.5], + ], + columns=["variable", "unit", 2010], + ), + model="model_a", + scenario="scen_a", + region="World", +) + +EXP_DF = IamDataFrame( + pd.DataFrame( + [ + [kaya_factor_names.FE_per_GNP, "EJ / USD / billion", 1.33333], + [kaya_factor_names.GNP_per_P, "USD * billion / million / a", 0.006000], + [kaya_factor_names.NFC_per_TFC, "", 0.833333], + [kaya_factor_names.PEdeq_per_FE, "", 1.250000], + [kaya_factor_names.PEFF_per_PEDEq, "", 0.900000], + [kaya_factor_names.TFC_per_PEFF, "Mt CO2/EJ", 1.333333], + [input_variable_names.POPULATION, "million", 1000], + [kaya_variable_names.TFC, "Mt CO2/yr", 12.0], + ], + columns=["variable", "unit", 2010], + ), + model="model_a", + scenario="scen_a", + region="World", +) + +# can't append EXP_DF to TEST_DF because of overlapping values +# append this dataframe to create full results for comparison +EXP_DF_FOR_APPEND = IamDataFrame( + pd.DataFrame( + [ + [kaya_factor_names.FE_per_GNP, "EJ / USD / billion", 1.33333], + [kaya_factor_names.GNP_per_P, "USD * billion / million / a", 0.006000], + [kaya_factor_names.NFC_per_TFC, "", 0.833333], + [kaya_factor_names.PEdeq_per_FE, "", 1.250000], + [kaya_factor_names.PEFF_per_PEDEq, "", 0.900000], + [kaya_factor_names.TFC_per_PEFF, "Mt CO2/EJ", 1.333333], + [kaya_variable_names.TFC, "Mt CO2/yr", 12.0], + ], + columns=["variable", "unit", 2010], + ), + model="model_a", + scenario="scen_a", + region="World", +) + + +@pytest.mark.parametrize("append", (False, True)) +def test_kaya_factors(append): + """Test computing kaya factors""" + + if append: + obs = TEST_DF.copy() + obs.compute.kaya_factors( + scenarios=[("model_a", "scen_a", "World")], append=True + ) + assert_iamframe_equal(TEST_DF.append(EXP_DF_FOR_APPEND), obs) + else: + obs = TEST_DF.compute.kaya_factors(scenarios=[("model_a", "scen_a", "World")]) + assert_iamframe_equal(EXP_DF, obs) + + +@pytest.mark.parametrize("append", (False, True)) +def test_kaya_variables_none_when_input_variables_missing(append): + """Assert that computing kaya variables with + missing input variables returns None + """ + + if append: + obs = TEST_DF.copy() + # select subset of required input variables + ( + obs.filter(variable=input_variable_names.POPULATION).compute.kaya_factors( + scenarios=[("model_a", "scen_a", "World")], append=True + ) + ) + # assert that no data was added + assert_iamframe_equal(TEST_DF, obs) + else: + obs = TEST_DF.filter( + variable=input_variable_names.POPULATION + ).compute.kaya_factors(scenarios=[("model_a", "scen_a", "World")]) + assert obs is None + + +def test_calling_kaya_factors_multiple_times(): + """Test calling the method a second time has no effect""" + + obs = TEST_DF.copy() + obs.compute.kaya_factors(scenarios=[("model_a", "scen_a", "World")], append=True) + obs.compute.kaya_factors(scenarios=[("model_a", "scen_a", "World")], append=True) + assert_iamframe_equal(TEST_DF.append(EXP_DF_FOR_APPEND), obs) diff --git a/tests/test_feature_kaya_lmdi.py b/tests/test_feature_kaya_lmdi.py new file mode 100644 index 000000000..87967f909 --- /dev/null +++ b/tests/test_feature_kaya_lmdi.py @@ -0,0 +1,157 @@ +import pandas as pd +import pytest + +from pyam import IamDataFrame +from pyam.kaya import input_variable_names, lmdi_names +from pyam.testing import assert_iamframe_equal + +TEST_DF = IamDataFrame( + pd.DataFrame( + [ + [input_variable_names.POPULATION, "million", 1000], + [input_variable_names.GDP_PPP, "billion USD_2005/yr", 6], + [input_variable_names.GDP_MER, "billion USD_2005/yr", 5], + [input_variable_names.FINAL_ENERGY, "EJ/yr", 8], + [input_variable_names.PRIMARY_ENERGY, "EJ/yr", 10], + [input_variable_names.PRIMARY_ENERGY_COAL, "EJ/yr", 5], + [input_variable_names.PRIMARY_ENERGY_GAS, "EJ/yr", 2], + [input_variable_names.PRIMARY_ENERGY_OIL, "EJ/yr", 2], + [ + input_variable_names.EMISSIONS_CO2_FOSSIL_FUELS_AND_INDUSTRY, + "Mt CO2/yr", + 10, + ], + [input_variable_names.EMISSIONS_CO2_INDUSTRIAL_PROCESSES, "Mt CO2/yr", 1], + [input_variable_names.EMISSIONS_CO2_AFOLU, "Mt CO2/yr", 1], + [input_variable_names.EMISSIONS_CO2_CCS, "Mt CO2/yr", 4], + [input_variable_names.EMISSIONS_CO2_CCS_BIOMASS, "Mt CO2/yr", 1], + [input_variable_names.CCS_FOSSIL_ENERGY, "Mt CO2/yr", 2], + [input_variable_names.CCS_FOSSIL_INDUSTRY, "Mt CO2/yr", 1], + [input_variable_names.CCS_BIOMASS_ENERGY, "Mt CO2/yr", 0.5], + [input_variable_names.CCS_BIOMASS_INDUSTRY, "Mt CO2/yr", 0.5], + ], + columns=["variable", "unit", 2010], + ), + model="model_a", + scenario="scen_a", + region="World", +).append( + IamDataFrame( + pd.DataFrame( + [ + [input_variable_names.POPULATION, "million", 1001], + [input_variable_names.GDP_PPP, "billion USD_2005/yr", 7], + [input_variable_names.GDP_MER, "billion USD_2005/yr", 6], + [input_variable_names.FINAL_ENERGY, "EJ/yr", 9], + [input_variable_names.PRIMARY_ENERGY, "EJ/yr", 11], + [input_variable_names.PRIMARY_ENERGY_COAL, "EJ/yr", 6], + [input_variable_names.PRIMARY_ENERGY_GAS, "EJ/yr", 3], + [input_variable_names.PRIMARY_ENERGY_OIL, "EJ/yr", 3], + [ + input_variable_names.EMISSIONS_CO2_FOSSIL_FUELS_AND_INDUSTRY, + "Mt CO2/yr", + 13, + ], + [ + input_variable_names.EMISSIONS_CO2_INDUSTRIAL_PROCESSES, + "Mt CO2/yr", + 2, + ], + [input_variable_names.EMISSIONS_CO2_AFOLU, "Mt CO2/yr", 2], + [input_variable_names.EMISSIONS_CO2_CCS, "Mt CO2/yr", 5], + [input_variable_names.EMISSIONS_CO2_CCS_BIOMASS, "Mt CO2/yr", 2], + [input_variable_names.CCS_FOSSIL_ENERGY, "Mt CO2/yr", 3], + [input_variable_names.CCS_FOSSIL_INDUSTRY, "Mt CO2/yr", 2], + [input_variable_names.CCS_BIOMASS_ENERGY, "Mt CO2/yr", 1.5], + [input_variable_names.CCS_BIOMASS_INDUSTRY, "Mt CO2/yr", 1.5], + ], + columns=["variable", "unit", 2010], + ), + model="model_a", + scenario="scen_b", + region="World", + ) +) + + +EXP_DF = IamDataFrame( + pd.DataFrame( + [ + [lmdi_names.FE_per_GNP_LMDI, "unknown", 1.321788], + [lmdi_names.GNP_per_P_LMDI, "unknown", 0], + [lmdi_names.PEdeq_per_FE_LMDI, "unknown", 0.816780], + [lmdi_names.PEFF_per_PEDEq_LMDI, "unknown", 0], + [lmdi_names.Pop_LMDI, "unknown", 0], + [lmdi_names.TFC_per_PEFF_LMDI, "unknown", 4.853221], + ], + columns=["variable", "unit", 2010], + ), + model="model_a::model_a", + scenario="scen_a::scen_b", + region="World::World", +) + + +@pytest.mark.parametrize("append", (False, True)) +def test_kaya_lmdi(append): + """Test computing kaya LMDI""" + + if append: + obs = TEST_DF.copy() + obs.compute.kaya_lmdi( + ref_scenario=("model_a", "scen_a", "World"), + int_scenario=("model_a", "scen_b", "World"), + append=True, + ) + assert_iamframe_equal(TEST_DF.append(EXP_DF), obs) + else: + obs = TEST_DF.compute.kaya_lmdi( + ref_scenario=("model_a", "scen_a", "World"), + int_scenario=("model_a", "scen_b", "World"), + ) + assert_iamframe_equal(EXP_DF, obs) + + +@pytest.mark.parametrize("append", (False, True)) +def test_kaya_lmdi_none_when_input_variables_missing(append): + """Assert that computing kaya LMDI with + missing input variables returns None + """ + + if append: + obs = TEST_DF.copy() + # select subset of required input variables + ( + obs.filter(variable=input_variable_names.POPULATION).compute.kaya_lmdi( + ref_scenario=("model_a", "scen_a", "World"), + int_scenario=("model_a", "scen_b", "World"), + append=True, + ) + ) + # assert that no data was added + assert_iamframe_equal(TEST_DF, obs) + else: + obs = TEST_DF.filter( + variable=input_variable_names.POPULATION + ).compute.kaya_lmdi( + ref_scenario=("model_a", "scen_a", "World"), + int_scenario=("model_a", "scen_b", "World"), + ) + assert obs is None + + +def test_calling_kaya_lmdi_multiple_times(): + """Test calling the method a second time has no effect""" + + obs = TEST_DF.copy() + obs.compute.kaya_lmdi( + ref_scenario=("model_a", "scen_a", "World"), + int_scenario=("model_a", "scen_b", "World"), + append=True, + ) + obs.compute.kaya_lmdi( + ref_scenario=("model_a", "scen_a", "World"), + int_scenario=("model_a", "scen_b", "World"), + append=True, + ) + assert_iamframe_equal(TEST_DF.append(EXP_DF), obs) diff --git a/tests/test_feature_kaya_variables.py b/tests/test_feature_kaya_variables.py new file mode 100644 index 000000000..0f6ebbb8c --- /dev/null +++ b/tests/test_feature_kaya_variables.py @@ -0,0 +1,119 @@ +import pandas as pd +import pytest + +from pyam import IamDataFrame +from pyam.kaya import input_variable_names, kaya_variable_names +from pyam.testing import assert_iamframe_equal + +TEST_DF = IamDataFrame( + pd.DataFrame( + [ + [input_variable_names.POPULATION, "million", 1000], + [input_variable_names.GDP_PPP, "billion USD_2005/yr", 6], + [input_variable_names.GDP_MER, "billion USD_2005/yr", 5], + [input_variable_names.FINAL_ENERGY, "EJ/yr", 8], + [input_variable_names.PRIMARY_ENERGY, "EJ/yr", 10], + [input_variable_names.PRIMARY_ENERGY_COAL, "EJ/yr", 5], + [input_variable_names.PRIMARY_ENERGY_GAS, "EJ/yr", 2], + [input_variable_names.PRIMARY_ENERGY_OIL, "EJ/yr", 2], + [ + input_variable_names.EMISSIONS_CO2_FOSSIL_FUELS_AND_INDUSTRY, + "Mt CO2/yr", + 10, + ], + [input_variable_names.EMISSIONS_CO2_INDUSTRIAL_PROCESSES, "Mt CO2/yr", 1], + [input_variable_names.EMISSIONS_CO2_AFOLU, "Mt CO2/yr", 1], + [input_variable_names.EMISSIONS_CO2_CCS, "Mt CO2/yr", 4], + [input_variable_names.EMISSIONS_CO2_CCS_BIOMASS, "Mt CO2/yr", 1], + [input_variable_names.CCS_FOSSIL_ENERGY, "Mt CO2/yr", 2], + [input_variable_names.CCS_FOSSIL_INDUSTRY, "Mt CO2/yr", 1], + [input_variable_names.CCS_BIOMASS_ENERGY, "Mt CO2/yr", 0.5], + [input_variable_names.CCS_BIOMASS_INDUSTRY, "Mt CO2/yr", 0.5], + ], + columns=["variable", "unit", 2010], + ), + model="model_a", + scenario="scen_a", + region="World", +) + +EXP_DF = IamDataFrame( + pd.DataFrame( + [ + [input_variable_names.POPULATION, "million", 1000], + [input_variable_names.GDP_PPP, "billion USD_2005/yr", 6], + [input_variable_names.FINAL_ENERGY, "EJ/yr", 8.0], + [input_variable_names.PRIMARY_ENERGY, "EJ/yr", 10.0], + [kaya_variable_names.PRIMARY_ENERGY_FF, "EJ/yr", 9.0], + [kaya_variable_names.TFC, "Mt CO2/yr", 12.0], + [kaya_variable_names.NFC, "Mt CO2/yr", 10.0], + ], + columns=["variable", "unit", 2010], + ), + model="model_a", + scenario="scen_a", + region="World", +) + +# can't append EXP_DF to TEST_DF because of overlapping values +# append this dataframe to create full results for comparison +EXP_DF_FOR_APPEND = IamDataFrame( + pd.DataFrame( + [ + [kaya_variable_names.PRIMARY_ENERGY_FF, "EJ/yr", 9.0], + [kaya_variable_names.TFC, "Mt CO2/yr", 12.0], + [kaya_variable_names.NFC, "Mt CO2/yr", 10.0], + ], + columns=["variable", "unit", 2010], + ), + model="model_a", + scenario="scen_a", + region="World", +) + + +@pytest.mark.parametrize("append", (False, True)) +def test_kaya_variables(append): + """Test computing kaya variables""" + + if append: + obs = TEST_DF.copy() + obs.compute.kaya_variables( + scenarios=[("model_a", "scen_a", "World")], append=True + ) + assert_iamframe_equal(TEST_DF.append(EXP_DF_FOR_APPEND), obs) + else: + obs = TEST_DF.compute.kaya_variables(scenarios=[("model_a", "scen_a", "World")]) + assert_iamframe_equal(EXP_DF, obs) + + +@pytest.mark.parametrize("append", (False, True)) +def test_kaya_variables_none_when_input_variables_missing(append): + """Assert that computing kaya variables with + missing input variables returns None + """ + + if append: + obs = TEST_DF.copy() + # select subset of required input variables + ( + obs.filter(variable=input_variable_names.POPULATION).compute.kaya_variables( + scenarios=[("model_a", "scen_a", "World")], append=True + ) + ) + # assert that no data was added + assert_iamframe_equal(TEST_DF, obs) + else: + obs = TEST_DF.filter( + variable=input_variable_names.POPULATION + ).compute.kaya_variables(scenarios=[("model_a", "scen_a", "World")]) + assert obs is None + + +def test_calling_kaya_variables_multiple_times(): + """Test calling the method a second time has no effect""" + + obs = TEST_DF.copy() + obs.compute.kaya_variables(scenarios=[("model_a", "scen_a", "World")], append=True) + obs.compute.kaya_variables(scenarios=[("model_a", "scen_a", "World")], append=True) + assert_iamframe_equal(TEST_DF.append(EXP_DF_FOR_APPEND), obs) From 734d3c03a0b093cbf7f54306cec9131750f18d39 Mon Sep 17 00:00:00 2001 From: Zachary Schmidt Date: Mon, 2 Dec 2024 14:51:55 -0800 Subject: [PATCH 2/9] remove scenarios arg from kaya_variables method of compute module. kaya_variables now creates variables for all scenarios/model/region combination in a dataframe if possible. Also add special cases for GDP (both PPP and MER are accepted) --- pyam/compute.py | 24 +++--- pyam/kaya/kaya_variables.py | 109 +++++++++++++++++++-------- tests/test_feature_kaya_variables.py | 32 ++++++-- 3 files changed, 110 insertions(+), 55 deletions(-) diff --git a/pyam/compute.py b/pyam/compute.py index aaf979277..00650befb 100644 --- a/pyam/compute.py +++ b/pyam/compute.py @@ -251,14 +251,12 @@ def bias(self, name, method, axis): """ _compute_bias(self._df, name, method, axis) - def kaya_variables(self, scenarios, append=False): - """Compute the variables needed to compute Kaya factors + def kaya_variables(self, append=False): + """Create the set of variables needed to compute Kaya factors for the Kaya Decomposition Analysis. Parameters ---------- - scenarios : iterable of tuples (model, scenario, region) - The (model, scenario, region) combinations to be included. append : bool, optional Whether to append computed timeseries data to this instance. @@ -274,9 +272,7 @@ def kaya_variables(self, scenarios, append=False): .. code-block:: python - df.compute.kaya_variables(scenarios=[("model_a", "scenario_a", "region_a"), - ("model_b", "scenario_b", "region_b")], - append=True) + df.compute.kaya_variables(append=True) The IamDataFrame must contain the following variables, otherwise the method will return None: @@ -300,22 +296,22 @@ def kaya_variables(self, scenarios, append=False): - Carbon Sequestration|CCS|Biomass|Industrial Processes """ - valid_scenarios = _validate_kaya_scenario_args(scenarios=scenarios) - if valid_scenarios is None: - return None - kaya_variables_frame = kaya_variables.kaya_variables(self._df, valid_scenarios) + + kaya_variables_frame = kaya_variables.kaya_variables( + self._df) if kaya_variables_frame is None: return None if append: self._df.append( _find_non_duplicate_rows(self._df, kaya_variables_frame), inplace=True ) + return None return kaya_variables_frame def kaya_factors(self, scenarios, append=False): - """Compute the Kaya factors needed to compute factors - for the Kaya Decomposition Analysis. + """Compute the Kaya factors needed for the + Kaya Decomposition Analysis. Parameters ---------- @@ -365,7 +361,7 @@ def kaya_factors(self, scenarios, append=False): valid_scenarios = _validate_kaya_scenario_args(scenarios=scenarios) if valid_scenarios is None: return None - kaya_variables = self.kaya_variables(valid_scenarios, append=False) + kaya_variables = self.kaya_variables(append=False) if kaya_variables is None: return None kaya_factors_frame = kaya_factors.kaya_factors(kaya_variables, valid_scenarios) diff --git a/pyam/kaya/kaya_variables.py b/pyam/kaya/kaya_variables.py index 2ee4fa410..d1373316f 100644 --- a/pyam/kaya/kaya_variables.py +++ b/pyam/kaya/kaya_variables.py @@ -1,55 +1,98 @@ import logging -from functools import reduce +import warnings +import pyam from pyam.kaya import input_variable_names, kaya_variable_names logger = logging.getLogger(__name__) -input_variable_list = [ +required_input_variables = [ vars(input_variable_names)[variable_name] for variable_name in dir(input_variable_names) if not variable_name.startswith("__") ] -def kaya_variables(input_data, scenarios): +def kaya_variables(input_data): + if _is_input_data_incomplete(input_data): + return None + + kaya_variables = pyam.concat( + [ + _calc_pop(input_data), + _calc_gdp(input_data), + _calc_fe(input_data), + _calc_pe(input_data), + _calc_pe_ff(input_data), + _calc_tfc(input_data), + _calc_nfc(input_data), + ] + ) + return kaya_variables + + +def _is_input_data_incomplete(input_data): # copy data so we don't create side effects # in particular, require_data will change the "exclude" series input_data = input_data.copy() - validated_input_data = _validate_input_data(input_data) - if validated_input_data.empty: - return None - kaya_variable_frames = [] - for scenario in scenarios: - input = validated_input_data.filter( - model=scenario[0], scenario=scenario[1], region=scenario[2] + # Get all unique model/scenario/region combinations + scenario_model_region = input_data.data[ + ["model", "scenario", "region"] + ].drop_duplicates() + + # Check each combination + for _, row in scenario_model_region.iterrows(): + single_combination = input_data.filter( + model=row["model"], scenario=row["scenario"], region=row["region"] ) - if input.empty: - break - kaya_variable_frames.append(_calc_pop(input)) - kaya_variable_frames.append(_calc_gdp(input)) - kaya_variable_frames.append(_calc_fe(input)) - kaya_variable_frames.append(_calc_pe(input)) - kaya_variable_frames.append(_calc_pe_ff(input)) - kaya_variable_frames.append(_calc_tfc(input)) - kaya_variable_frames.append(_calc_nfc(input)) - - if len(kaya_variable_frames) == 0: - return None - # append all the IamDataFrames into one - return reduce(lambda x, y: x.append(y), kaya_variable_frames) - -def _validate_input_data(input_data): - missing_variables = input_data.require_data( - variable=input_variable_list, exclude_on_fail=True + # Get variables present for this combination + single_combination_variables = set(single_combination.data["variable"].unique()) + # special case for GDP: either form is acceptable, so don't check for either + # as long as one is present + required_variables_set = make_required_variables_set( + single_combination_variables + ) + # Check if any required variables are missing + missing_variables = set(required_variables_set) - single_combination_variables + + if missing_variables is not None: + logger.info( + f"Variables missing for model: {row['model']}, scenario: {row['scenario']}, region: {row['region']}:" + f"\n{missing_variables}" + ) + + # special case for GDP: either form is acceptable, so don't check for either + # as long as one is present + required_variables_set = make_required_variables_set( + set(input_data.data["variable"].unique()) ) - if missing_variables is not None: - logger.info( - f"These variables are missing from the \ - scenarios in input_data:\n{missing_variables}" + # exclude model/scenario combinations that have missing variables, disregarding region + # even if all variables are not present for a region, arithmetic operations + # will return an empty dataframe, not throw an error, so it is safe to proceed + input_data.require_data(variable=required_input_variables, exclude_on_fail=True) + # supress warning about empty dataframe if filtering excludes all scenarios + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + return input_data.filter(exclude=False).empty + + +def make_required_variables_set(input_variables): + required_variables_set = set(required_input_variables) + if not _missing_gdp(input_variables): + # either form of GDP is acceptable, so don't check for both + # as long as one is present + return required_variables_set - set( + [input_variable_names.GDP_PPP, input_variable_names.GDP_MER] ) - return input_data.filter(exclude=False) + return required_variables_set + + +def _missing_gdp(input_variables): + return ( + input_variable_names.GDP_PPP in input_variables + or input_variable_names.GDP_MER in input_variables + ) def _calc_pop(input_data): diff --git a/tests/test_feature_kaya_variables.py b/tests/test_feature_kaya_variables.py index 0f6ebbb8c..a35fe781c 100644 --- a/tests/test_feature_kaya_variables.py +++ b/tests/test_feature_kaya_variables.py @@ -1,5 +1,6 @@ import pandas as pd import pytest +import logging from pyam import IamDataFrame from pyam.kaya import input_variable_names, kaya_variable_names @@ -78,12 +79,10 @@ def test_kaya_variables(append): if append: obs = TEST_DF.copy() - obs.compute.kaya_variables( - scenarios=[("model_a", "scen_a", "World")], append=True - ) + obs.compute.kaya_variables(append=True) assert_iamframe_equal(TEST_DF.append(EXP_DF_FOR_APPEND), obs) else: - obs = TEST_DF.compute.kaya_variables(scenarios=[("model_a", "scen_a", "World")]) + obs = TEST_DF.compute.kaya_variables() assert_iamframe_equal(EXP_DF, obs) @@ -98,7 +97,7 @@ def test_kaya_variables_none_when_input_variables_missing(append): # select subset of required input variables ( obs.filter(variable=input_variable_names.POPULATION).compute.kaya_variables( - scenarios=[("model_a", "scen_a", "World")], append=True + append=True ) ) # assert that no data was added @@ -106,7 +105,7 @@ def test_kaya_variables_none_when_input_variables_missing(append): else: obs = TEST_DF.filter( variable=input_variable_names.POPULATION - ).compute.kaya_variables(scenarios=[("model_a", "scen_a", "World")]) + ).compute.kaya_variables() assert obs is None @@ -114,6 +113,23 @@ def test_calling_kaya_variables_multiple_times(): """Test calling the method a second time has no effect""" obs = TEST_DF.copy() - obs.compute.kaya_variables(scenarios=[("model_a", "scen_a", "World")], append=True) - obs.compute.kaya_variables(scenarios=[("model_a", "scen_a", "World")], append=True) + obs.compute.kaya_variables(append=True) + obs.compute.kaya_variables(append=True) assert_iamframe_equal(TEST_DF.append(EXP_DF_FOR_APPEND), obs) + + +def test_kaya_variables_logs_missing_variables(caplog): + """Test that missing variables are correctly logged""" + # Create test data with only population + df_no_pop = TEST_DF.filter(variable=input_variable_names.POPULATION, keep=False) + + with caplog.at_level(logging.INFO): + df_no_pop.compute.kaya_variables() + + # Check that the log message contains expected information + assert ( + "Variables missing for model: model_a, scenario: scen_a, region: World" + in caplog.text + ) + + assert input_variable_names.POPULATION in caplog.text From 612e8f6c902df7a107497b1131bd4d7201777bc7 Mon Sep 17 00:00:00 2001 From: Zachary Schmidt Date: Wed, 4 Dec 2024 00:28:14 -0800 Subject: [PATCH 3/9] fix bugs in kaya_variables for gdp special cases and add tests for those cases --- pyam/kaya/kaya_variables.py | 10 ++++----- tests/test_feature_kaya_variables.py | 33 ++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/pyam/kaya/kaya_variables.py b/pyam/kaya/kaya_variables.py index d1373316f..9fd753e49 100644 --- a/pyam/kaya/kaya_variables.py +++ b/pyam/kaya/kaya_variables.py @@ -15,6 +15,7 @@ def kaya_variables(input_data): if _is_input_data_incomplete(input_data): + print("returning NONE") return None kaya_variables = pyam.concat( @@ -55,8 +56,7 @@ def _is_input_data_incomplete(input_data): ) # Check if any required variables are missing missing_variables = set(required_variables_set) - single_combination_variables - - if missing_variables is not None: + if missing_variables: logger.info( f"Variables missing for model: {row['model']}, scenario: {row['scenario']}, region: {row['region']}:" f"\n{missing_variables}" @@ -70,7 +70,7 @@ def _is_input_data_incomplete(input_data): # exclude model/scenario combinations that have missing variables, disregarding region # even if all variables are not present for a region, arithmetic operations # will return an empty dataframe, not throw an error, so it is safe to proceed - input_data.require_data(variable=required_input_variables, exclude_on_fail=True) + input_data.require_data(variable=list(required_variables_set), exclude_on_fail=True) # supress warning about empty dataframe if filtering excludes all scenarios with warnings.catch_warnings(): warnings.simplefilter("ignore") @@ -79,7 +79,7 @@ def _is_input_data_incomplete(input_data): def make_required_variables_set(input_variables): required_variables_set = set(required_input_variables) - if not _missing_gdp(input_variables): + if _has_at_least_one_gdp(input_variables): # either form of GDP is acceptable, so don't check for both # as long as one is present return required_variables_set - set( @@ -88,7 +88,7 @@ def make_required_variables_set(input_variables): return required_variables_set -def _missing_gdp(input_variables): +def _has_at_least_one_gdp(input_variables): return ( input_variable_names.GDP_PPP in input_variables or input_variable_names.GDP_MER in input_variables diff --git a/tests/test_feature_kaya_variables.py b/tests/test_feature_kaya_variables.py index a35fe781c..e05ffd8b2 100644 --- a/tests/test_feature_kaya_variables.py +++ b/tests/test_feature_kaya_variables.py @@ -133,3 +133,36 @@ def test_kaya_variables_logs_missing_variables(caplog): ) assert input_variable_names.POPULATION in caplog.text + + +def test_kaya_variables_uses_gdp_mer_fallback(): + """Test that kaya_variables uses GDP_MER when GDP_PPP is not available""" + # Create test data without GDP_PPP + df_no_gdp_ppp = TEST_DF.filter(variable=input_variable_names.GDP_PPP, keep=False) + + # Create expected result without GDP_MER instead of GDP_PPP + exp_no_gdp_ppp = EXP_DF.filter( + variable=input_variable_names.GDP_PPP, keep=False + ).append(TEST_DF.filter(variable=input_variable_names.GDP_MER)) + + # Compute kaya variables + obs = df_no_gdp_ppp.compute.kaya_variables() + + # Verify results match expected + assert_iamframe_equal(exp_no_gdp_ppp, obs) + + +def test_kaya_variables_returns_none_when_no_gdp_available(): + """Test that kaya_variables returns None both + GDP_MER and GDP_PPP are unavailable""" + # Create test data without GDP_PPP + df_no_gdp = TEST_DF.filter( + variable=[input_variable_names.GDP_PPP, input_variable_names.GDP_MER], + keep=False, + ) + + # Compute kaya variables + obs = df_no_gdp.compute.kaya_variables() + + # Verify results match expected + assert obs is None From e7a779703dc956ab33ad8cf85f3f62e9f4ec2656 Mon Sep 17 00:00:00 2001 From: Zachary Schmidt Date: Wed, 4 Dec 2024 12:16:05 -0800 Subject: [PATCH 4/9] remove scenarios argument from kaya_factors --- pyam/compute.py | 24 +++++++------------ pyam/kaya/kaya_factors.py | 38 ++++++++++++------------------ pyam/kaya/kaya_variables.py | 1 - tests/test_feature_kaya_factors.py | 14 +++++------ 4 files changed, 30 insertions(+), 47 deletions(-) diff --git a/pyam/compute.py b/pyam/compute.py index 00650befb..b3d54f38c 100644 --- a/pyam/compute.py +++ b/pyam/compute.py @@ -297,8 +297,7 @@ def kaya_variables(self, append=False): """ - kaya_variables_frame = kaya_variables.kaya_variables( - self._df) + kaya_variables_frame = kaya_variables.kaya_variables(self._df) if kaya_variables_frame is None: return None if append: @@ -309,14 +308,12 @@ def kaya_variables(self, append=False): return kaya_variables_frame - def kaya_factors(self, scenarios, append=False): + def kaya_factors(self, append=False): """Compute the Kaya factors needed for the Kaya Decomposition Analysis. Parameters ---------- - scenarios : iterable of tuples (model, scenario, region) - The (model, scenario, region) combinations to be included. append : bool, optional Whether to append computed timeseries data to this instance. @@ -332,9 +329,7 @@ def kaya_factors(self, scenarios, append=False): .. code-block:: python - df.compute.kaya_factors(scenarios=[("model_a", "scenario_a", "region_a"), - ("model_b", "scenario_b", "region_b")], - append=True) + df.compute.kaya_factors(append=True) The IamDataFrame must contain the following variables, otherwise the method will return None: @@ -358,13 +353,10 @@ def kaya_factors(self, scenarios, append=False): - Carbon Sequestration|CCS|Biomass|Industrial Processes """ - valid_scenarios = _validate_kaya_scenario_args(scenarios=scenarios) - if valid_scenarios is None: - return None kaya_variables = self.kaya_variables(append=False) if kaya_variables is None: return None - kaya_factors_frame = kaya_factors.kaya_factors(kaya_variables, valid_scenarios) + kaya_factors_frame = kaya_factors.kaya_factors(kaya_variables) if kaya_factors_frame is None: return None if append: @@ -400,9 +392,11 @@ def kaya_lmdi(self, ref_scenario, int_scenario, append=False): .. code-block:: python - df.compute.kaya_lmdi(ref_scenario=("model_a", "scenario_a", "region_a"), - int_scenario=("model_b", "scenario_b", "region_b"), - append=True) + df.compute.kaya_lmdi( + ref_scenario=("model_a", "scenario_a", "region_a"), + int_scenario=("model_b", "scenario_b", "region_b"), + append=True, + ) The IamDataFrame must contain the following variables, otherwise the method will return None: diff --git a/pyam/kaya/kaya_factors.py b/pyam/kaya/kaya_factors.py index bc0b2bb27..abc626522 100644 --- a/pyam/kaya/kaya_factors.py +++ b/pyam/kaya/kaya_factors.py @@ -1,30 +1,22 @@ -from functools import reduce - +import pyam from pyam.kaya import input_variable_names, kaya_factor_names, kaya_variable_names -def kaya_factors(kaya_variables_frame, scenarios): - kaya_factors_frames = [] - for scenario in scenarios: - input = kaya_variables_frame.filter( - model=scenario[0], scenario=scenario[1], region=scenario[2] - ) - if input.empty: - break - kaya_factors_frames.append(_calc_gnp_per_p(input)) - kaya_factors_frames.append(_calc_fe_per_gnp(input)) - kaya_factors_frames.append(_calc_pedeq_per_fe(input)) - kaya_factors_frames.append(_calc_peff_per_pedeq(input)) - kaya_factors_frames.append(_calc_tfc_per_peff(input)) - kaya_factors_frames.append(_calc_nfc_per_tfc(input)) - kaya_factors_frames.append( - input.filter( +def kaya_factors(kaya_variables_frame): + kaya_factors = pyam.concat( + [ + _calc_gnp_per_p(kaya_variables_frame), + _calc_fe_per_gnp(kaya_variables_frame), + _calc_pedeq_per_fe(kaya_variables_frame), + _calc_peff_per_pedeq(kaya_variables_frame), + _calc_tfc_per_peff(kaya_variables_frame), + _calc_nfc_per_tfc(kaya_variables_frame), + kaya_variables_frame.filter( variable=[kaya_variable_names.TFC, input_variable_names.POPULATION] - ) - ) - if len(kaya_factors_frames) == 0: - return None - return reduce(lambda x, y: x.append(y), kaya_factors_frames) + ), + ] + ) + return kaya_factors def _calc_gnp_per_p(input_data): diff --git a/pyam/kaya/kaya_variables.py b/pyam/kaya/kaya_variables.py index 9fd753e49..b7eb07366 100644 --- a/pyam/kaya/kaya_variables.py +++ b/pyam/kaya/kaya_variables.py @@ -15,7 +15,6 @@ def kaya_variables(input_data): if _is_input_data_incomplete(input_data): - print("returning NONE") return None kaya_variables = pyam.concat( diff --git a/tests/test_feature_kaya_factors.py b/tests/test_feature_kaya_factors.py index c3b7672a6..36101a4d8 100644 --- a/tests/test_feature_kaya_factors.py +++ b/tests/test_feature_kaya_factors.py @@ -83,12 +83,10 @@ def test_kaya_factors(append): if append: obs = TEST_DF.copy() - obs.compute.kaya_factors( - scenarios=[("model_a", "scen_a", "World")], append=True - ) + obs.compute.kaya_factors(append=True) assert_iamframe_equal(TEST_DF.append(EXP_DF_FOR_APPEND), obs) else: - obs = TEST_DF.compute.kaya_factors(scenarios=[("model_a", "scen_a", "World")]) + obs = TEST_DF.compute.kaya_factors() assert_iamframe_equal(EXP_DF, obs) @@ -103,7 +101,7 @@ def test_kaya_variables_none_when_input_variables_missing(append): # select subset of required input variables ( obs.filter(variable=input_variable_names.POPULATION).compute.kaya_factors( - scenarios=[("model_a", "scen_a", "World")], append=True + append=True ) ) # assert that no data was added @@ -111,7 +109,7 @@ def test_kaya_variables_none_when_input_variables_missing(append): else: obs = TEST_DF.filter( variable=input_variable_names.POPULATION - ).compute.kaya_factors(scenarios=[("model_a", "scen_a", "World")]) + ).compute.kaya_factors() assert obs is None @@ -119,6 +117,6 @@ def test_calling_kaya_factors_multiple_times(): """Test calling the method a second time has no effect""" obs = TEST_DF.copy() - obs.compute.kaya_factors(scenarios=[("model_a", "scen_a", "World")], append=True) - obs.compute.kaya_factors(scenarios=[("model_a", "scen_a", "World")], append=True) + obs.compute.kaya_factors(append=True) + obs.compute.kaya_factors(append=True) assert_iamframe_equal(TEST_DF.append(EXP_DF_FOR_APPEND), obs) From 491b8ec2202de21c0fa0e2ea8c6758435f419cd4 Mon Sep 17 00:00:00 2001 From: Zachary Schmidt Date: Wed, 4 Dec 2024 15:06:37 -0800 Subject: [PATCH 5/9] change unit conversion for undefined units and make docstring one line --- pyam/compute.py | 6 ++---- pyam/kaya/kaya_factors.py | 4 ++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/pyam/compute.py b/pyam/compute.py index b3d54f38c..4ee361818 100644 --- a/pyam/compute.py +++ b/pyam/compute.py @@ -252,8 +252,7 @@ def bias(self, name, method, axis): _compute_bias(self._df, name, method, axis) def kaya_variables(self, append=False): - """Create the set of variables needed to compute Kaya factors - for the Kaya Decomposition Analysis. + """Create the set of variables needed to compute Kaya factors. Parameters ---------- @@ -309,8 +308,7 @@ def kaya_variables(self, append=False): return kaya_variables_frame def kaya_factors(self, append=False): - """Compute the Kaya factors needed for the - Kaya Decomposition Analysis. + """Compute the factors for the Kaya Decomposition Analysis Parameters ---------- diff --git a/pyam/kaya/kaya_factors.py b/pyam/kaya/kaya_factors.py index abc626522..081ed077d 100644 --- a/pyam/kaya/kaya_factors.py +++ b/pyam/kaya/kaya_factors.py @@ -76,6 +76,6 @@ def _calc_nfc_per_tfc(input_data): kaya_variable_names.NFC, kaya_variable_names.TFC, kaya_factor_names.NFC_per_TFC, - ignore_units=True, + ignore_units="", append=False, - ).rename(unit={"unknown": ""}) + ) # .rename(unit={"unknown": ""}) From ae90ca846b3dcfaf75150be5afee1e48063c32d4 Mon Sep 17 00:00:00 2001 From: Zachary Schmidt Date: Sun, 8 Dec 2024 16:33:53 -0800 Subject: [PATCH 6/9] use strings instead of module-internal translation for variable names in tests --- tests/test_feature_kaya_factors.py | 75 ++++++++++++------------- tests/test_feature_kaya_variables.py | 82 +++++++++++++--------------- 2 files changed, 72 insertions(+), 85 deletions(-) diff --git a/tests/test_feature_kaya_factors.py b/tests/test_feature_kaya_factors.py index 36101a4d8..fa0a14616 100644 --- a/tests/test_feature_kaya_factors.py +++ b/tests/test_feature_kaya_factors.py @@ -2,33 +2,32 @@ import pytest from pyam import IamDataFrame -from pyam.kaya import input_variable_names, kaya_factor_names, kaya_variable_names from pyam.testing import assert_iamframe_equal TEST_DF = IamDataFrame( pd.DataFrame( [ - [input_variable_names.POPULATION, "million", 1000], - [input_variable_names.GDP_PPP, "billion USD_2005/yr", 6], - [input_variable_names.GDP_MER, "billion USD_2005/yr", 5], - [input_variable_names.FINAL_ENERGY, "EJ/yr", 8], - [input_variable_names.PRIMARY_ENERGY, "EJ/yr", 10], - [input_variable_names.PRIMARY_ENERGY_COAL, "EJ/yr", 5], - [input_variable_names.PRIMARY_ENERGY_GAS, "EJ/yr", 2], - [input_variable_names.PRIMARY_ENERGY_OIL, "EJ/yr", 2], + ["Population", "million", 1000], + ["GDP|PPP", "billion USD_2005/yr", 6], + ["GDP|MER", "billion USD_2005/yr", 5], + ["Final Energy", "EJ/yr", 8], + ["Primary Energy", "EJ/yr", 10], + ["Primary Energy|Coal", "EJ/yr", 5], + ["Primary Energy|Gas", "EJ/yr", 2], + ["Primary Energy|Oil", "EJ/yr", 2], [ - input_variable_names.EMISSIONS_CO2_FOSSIL_FUELS_AND_INDUSTRY, + "Emissions|CO2|Fossil Fuels and Industry", "Mt CO2/yr", 10, ], - [input_variable_names.EMISSIONS_CO2_INDUSTRIAL_PROCESSES, "Mt CO2/yr", 1], - [input_variable_names.EMISSIONS_CO2_AFOLU, "Mt CO2/yr", 1], - [input_variable_names.EMISSIONS_CO2_CCS, "Mt CO2/yr", 4], - [input_variable_names.EMISSIONS_CO2_CCS_BIOMASS, "Mt CO2/yr", 1], - [input_variable_names.CCS_FOSSIL_ENERGY, "Mt CO2/yr", 2], - [input_variable_names.CCS_FOSSIL_INDUSTRY, "Mt CO2/yr", 1], - [input_variable_names.CCS_BIOMASS_ENERGY, "Mt CO2/yr", 0.5], - [input_variable_names.CCS_BIOMASS_INDUSTRY, "Mt CO2/yr", 0.5], + ["Emissions|CO2|Industrial Processes", "Mt CO2/yr", 1], + ["Emissions|CO2|AFOLU", "Mt CO2/yr", 1], + ["Emissions|CO2|Carbon Capture and Storage", "Mt CO2/yr", 4], + ["Emissions|CO2|Carbon Capture and Storage|Biomass", "Mt CO2/yr", 1], + ["Carbon Sequestration|CCS|Fossil|Energy", "Mt CO2/yr", 2], + ["Carbon Sequestration|CCS|Fossil|Industrial Processes", "Mt CO2/yr", 1], + ["Carbon Sequestration|CCS|Biomass|Energy", "Mt CO2/yr", 0.5], + ["Carbon Sequestration|CCS|Biomass|Industrial Processes", "Mt CO2/yr", 0.5], ], columns=["variable", "unit", 2010], ), @@ -40,14 +39,14 @@ EXP_DF = IamDataFrame( pd.DataFrame( [ - [kaya_factor_names.FE_per_GNP, "EJ / USD / billion", 1.33333], - [kaya_factor_names.GNP_per_P, "USD * billion / million / a", 0.006000], - [kaya_factor_names.NFC_per_TFC, "", 0.833333], - [kaya_factor_names.PEdeq_per_FE, "", 1.250000], - [kaya_factor_names.PEFF_per_PEDEq, "", 0.900000], - [kaya_factor_names.TFC_per_PEFF, "Mt CO2/EJ", 1.333333], - [input_variable_names.POPULATION, "million", 1000], - [kaya_variable_names.TFC, "Mt CO2/yr", 12.0], + ["FE/GNP", "EJ / USD / billion", 1.33333], + ["GNP/P", "USD * billion / million / a", 0.006000], + ["NFC/TFC", "", 0.833333], + ["PEDEq/FE", "", 1.250000], + ["PEFF/PEDEq", "", 0.900000], + ["TFC/PEFF", "Mt CO2/EJ", 1.333333], + ["Population", "million", 1000], + ["Total Fossil Carbon", "Mt CO2/yr", 12.0], ], columns=["variable", "unit", 2010], ), @@ -61,13 +60,13 @@ EXP_DF_FOR_APPEND = IamDataFrame( pd.DataFrame( [ - [kaya_factor_names.FE_per_GNP, "EJ / USD / billion", 1.33333], - [kaya_factor_names.GNP_per_P, "USD * billion / million / a", 0.006000], - [kaya_factor_names.NFC_per_TFC, "", 0.833333], - [kaya_factor_names.PEdeq_per_FE, "", 1.250000], - [kaya_factor_names.PEFF_per_PEDEq, "", 0.900000], - [kaya_factor_names.TFC_per_PEFF, "Mt CO2/EJ", 1.333333], - [kaya_variable_names.TFC, "Mt CO2/yr", 12.0], + ["FE/GNP", "EJ / USD / billion", 1.33333], + ["GNP/P", "USD * billion / million / a", 0.006000], + ["NFC/TFC", "", 0.833333], + ["PEDEq/FE", "", 1.250000], + ["PEFF/PEDEq", "", 0.900000], + ["TFC/PEFF", "Mt CO2/EJ", 1.333333], + ["Total Fossil Carbon", "Mt CO2/yr", 12.0], ], columns=["variable", "unit", 2010], ), @@ -99,17 +98,11 @@ def test_kaya_variables_none_when_input_variables_missing(append): if append: obs = TEST_DF.copy() # select subset of required input variables - ( - obs.filter(variable=input_variable_names.POPULATION).compute.kaya_factors( - append=True - ) - ) + (obs.filter(variable="Population").compute.kaya_factors(append=True)) # assert that no data was added assert_iamframe_equal(TEST_DF, obs) else: - obs = TEST_DF.filter( - variable=input_variable_names.POPULATION - ).compute.kaya_factors() + obs = TEST_DF.filter(variable="Population").compute.kaya_factors() assert obs is None diff --git a/tests/test_feature_kaya_variables.py b/tests/test_feature_kaya_variables.py index e05ffd8b2..5e785b517 100644 --- a/tests/test_feature_kaya_variables.py +++ b/tests/test_feature_kaya_variables.py @@ -1,35 +1,35 @@ +import logging + import pandas as pd import pytest -import logging from pyam import IamDataFrame -from pyam.kaya import input_variable_names, kaya_variable_names from pyam.testing import assert_iamframe_equal TEST_DF = IamDataFrame( pd.DataFrame( [ - [input_variable_names.POPULATION, "million", 1000], - [input_variable_names.GDP_PPP, "billion USD_2005/yr", 6], - [input_variable_names.GDP_MER, "billion USD_2005/yr", 5], - [input_variable_names.FINAL_ENERGY, "EJ/yr", 8], - [input_variable_names.PRIMARY_ENERGY, "EJ/yr", 10], - [input_variable_names.PRIMARY_ENERGY_COAL, "EJ/yr", 5], - [input_variable_names.PRIMARY_ENERGY_GAS, "EJ/yr", 2], - [input_variable_names.PRIMARY_ENERGY_OIL, "EJ/yr", 2], + ["Population", "million", 1000], + ["GDP|PPP", "billion USD_2005/yr", 6], + ["GDP|MER", "billion USD_2005/yr", 5], + ["Final Energy", "EJ/yr", 8], + ["Primary Energy", "EJ/yr", 10], + ["Primary Energy|Coal", "EJ/yr", 5], + ["Primary Energy|Gas", "EJ/yr", 2], + ["Primary Energy|Oil", "EJ/yr", 2], [ - input_variable_names.EMISSIONS_CO2_FOSSIL_FUELS_AND_INDUSTRY, + "Emissions|CO2|Fossil Fuels and Industry", "Mt CO2/yr", 10, ], - [input_variable_names.EMISSIONS_CO2_INDUSTRIAL_PROCESSES, "Mt CO2/yr", 1], - [input_variable_names.EMISSIONS_CO2_AFOLU, "Mt CO2/yr", 1], - [input_variable_names.EMISSIONS_CO2_CCS, "Mt CO2/yr", 4], - [input_variable_names.EMISSIONS_CO2_CCS_BIOMASS, "Mt CO2/yr", 1], - [input_variable_names.CCS_FOSSIL_ENERGY, "Mt CO2/yr", 2], - [input_variable_names.CCS_FOSSIL_INDUSTRY, "Mt CO2/yr", 1], - [input_variable_names.CCS_BIOMASS_ENERGY, "Mt CO2/yr", 0.5], - [input_variable_names.CCS_BIOMASS_INDUSTRY, "Mt CO2/yr", 0.5], + ["Emissions|CO2|Industrial Processes", "Mt CO2/yr", 1], + ["Emissions|CO2|AFOLU", "Mt CO2/yr", 1], + ["Emissions|CO2|Carbon Capture and Storage", "Mt CO2/yr", 4], + ["Emissions|CO2|Carbon Capture and Storage|Biomass", "Mt CO2/yr", 1], + ["Carbon Sequestration|CCS|Fossil|Energy", "Mt CO2/yr", 2], + ["Carbon Sequestration|CCS|Fossil|Industrial Processes", "Mt CO2/yr", 1], + ["Carbon Sequestration|CCS|Biomass|Energy", "Mt CO2/yr", 0.5], + ["Carbon Sequestration|CCS|Biomass|Industrial Processes", "Mt CO2/yr", 0.5], ], columns=["variable", "unit", 2010], ), @@ -41,13 +41,13 @@ EXP_DF = IamDataFrame( pd.DataFrame( [ - [input_variable_names.POPULATION, "million", 1000], - [input_variable_names.GDP_PPP, "billion USD_2005/yr", 6], - [input_variable_names.FINAL_ENERGY, "EJ/yr", 8.0], - [input_variable_names.PRIMARY_ENERGY, "EJ/yr", 10.0], - [kaya_variable_names.PRIMARY_ENERGY_FF, "EJ/yr", 9.0], - [kaya_variable_names.TFC, "Mt CO2/yr", 12.0], - [kaya_variable_names.NFC, "Mt CO2/yr", 10.0], + ["Population", "million", 1000], + ["GDP|PPP", "billion USD_2005/yr", 6], + ["Final Energy", "EJ/yr", 8.0], + ["Primary Energy", "EJ/yr", 10.0], + ["Primary Energy|Fossil", "EJ/yr", 9.0], + ["Total Fossil Carbon", "Mt CO2/yr", 12.0], + ["Net Fossil Carbon", "Mt CO2/yr", 10.0], ], columns=["variable", "unit", 2010], ), @@ -61,9 +61,9 @@ EXP_DF_FOR_APPEND = IamDataFrame( pd.DataFrame( [ - [kaya_variable_names.PRIMARY_ENERGY_FF, "EJ/yr", 9.0], - [kaya_variable_names.TFC, "Mt CO2/yr", 12.0], - [kaya_variable_names.NFC, "Mt CO2/yr", 10.0], + ["Primary Energy|Fossil", "EJ/yr", 9.0], + ["Total Fossil Carbon", "Mt CO2/yr", 12.0], + ["Net Fossil Carbon", "Mt CO2/yr", 10.0], ], columns=["variable", "unit", 2010], ), @@ -95,17 +95,11 @@ def test_kaya_variables_none_when_input_variables_missing(append): if append: obs = TEST_DF.copy() # select subset of required input variables - ( - obs.filter(variable=input_variable_names.POPULATION).compute.kaya_variables( - append=True - ) - ) + (obs.filter(variable="Population").compute.kaya_variables(append=True)) # assert that no data was added assert_iamframe_equal(TEST_DF, obs) else: - obs = TEST_DF.filter( - variable=input_variable_names.POPULATION - ).compute.kaya_variables() + obs = TEST_DF.filter(variable="Population").compute.kaya_variables() assert obs is None @@ -121,7 +115,7 @@ def test_calling_kaya_variables_multiple_times(): def test_kaya_variables_logs_missing_variables(caplog): """Test that missing variables are correctly logged""" # Create test data with only population - df_no_pop = TEST_DF.filter(variable=input_variable_names.POPULATION, keep=False) + df_no_pop = TEST_DF.filter(variable="Population", keep=False) with caplog.at_level(logging.INFO): df_no_pop.compute.kaya_variables() @@ -132,18 +126,18 @@ def test_kaya_variables_logs_missing_variables(caplog): in caplog.text ) - assert input_variable_names.POPULATION in caplog.text + assert "Population" in caplog.text def test_kaya_variables_uses_gdp_mer_fallback(): """Test that kaya_variables uses GDP_MER when GDP_PPP is not available""" # Create test data without GDP_PPP - df_no_gdp_ppp = TEST_DF.filter(variable=input_variable_names.GDP_PPP, keep=False) + df_no_gdp_ppp = TEST_DF.filter(variable="GDP|PPP", keep=False) # Create expected result without GDP_MER instead of GDP_PPP - exp_no_gdp_ppp = EXP_DF.filter( - variable=input_variable_names.GDP_PPP, keep=False - ).append(TEST_DF.filter(variable=input_variable_names.GDP_MER)) + exp_no_gdp_ppp = EXP_DF.filter(variable="GDP|PPP", keep=False).append( + TEST_DF.filter(variable="GDP|MER") + ) # Compute kaya variables obs = df_no_gdp_ppp.compute.kaya_variables() @@ -157,7 +151,7 @@ def test_kaya_variables_returns_none_when_no_gdp_available(): GDP_MER and GDP_PPP are unavailable""" # Create test data without GDP_PPP df_no_gdp = TEST_DF.filter( - variable=[input_variable_names.GDP_PPP, input_variable_names.GDP_MER], + variable=["GDP|PPP", "GDP|MER"], keep=False, ) From e0ef67d5fe4cdbabd84fd12b35b33851fdc8e042 Mon Sep 17 00:00:00 2001 From: Zachary Schmidt Date: Sun, 8 Dec 2024 20:19:47 -0800 Subject: [PATCH 7/9] fix line length and spacing --- pyam/compute.py | 2 -- pyam/kaya/kaya_variables.py | 13 ++++++++----- tests/test_feature_kaya_variables.py | 7 +++---- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/pyam/compute.py b/pyam/compute.py index 4ee361818..fbe35da1b 100644 --- a/pyam/compute.py +++ b/pyam/compute.py @@ -349,7 +349,6 @@ def kaya_factors(self, append=False): - Carbon Sequestration|CCS|Fossil|Industrial Processes - Carbon Sequestration|CCS|Biomass|Energy - Carbon Sequestration|CCS|Biomass|Industrial Processes - """ kaya_variables = self.kaya_variables(append=False) if kaya_variables is None: @@ -430,7 +429,6 @@ def kaya_lmdi(self, ref_scenario, int_scenario, append=False): model_a::model_a scen_a::scen_b World::World PEFF/PEDEq (LMDI) unknown 2010 0.000000 model_a::model_a scen_a::scen_b World::World Population (LMDI) unknown 2010 0.000000 model_a::model_a scen_a::scen_b World::World TFC/PEFF (LMDI) unknown 2010 4.853221 - """ valid_ref_and_int_scenarios = _validate_kaya_scenario_args( scenarios=[ref_scenario, int_scenario] diff --git a/pyam/kaya/kaya_variables.py b/pyam/kaya/kaya_variables.py index b7eb07366..370db6f0d 100644 --- a/pyam/kaya/kaya_variables.py +++ b/pyam/kaya/kaya_variables.py @@ -57,8 +57,10 @@ def _is_input_data_incomplete(input_data): missing_variables = set(required_variables_set) - single_combination_variables if missing_variables: logger.info( - f"Variables missing for model: {row['model']}, scenario: {row['scenario']}, region: {row['region']}:" - f"\n{missing_variables}" + f"""Variables missing for + model: {row['model']}, + scenario: {row['scenario']}, + region: {row['region']}\nMissing variables: {missing_variables}""" ) # special case for GDP: either form is acceptable, so don't check for either @@ -66,9 +68,10 @@ def _is_input_data_incomplete(input_data): required_variables_set = make_required_variables_set( set(input_data.data["variable"].unique()) ) - # exclude model/scenario combinations that have missing variables, disregarding region - # even if all variables are not present for a region, arithmetic operations - # will return an empty dataframe, not throw an error, so it is safe to proceed + # exclude model/scenario combinations that have missing variables, + # disregarding region. even if all variables are not present for a region, + # arithmetic operations will return an empty dataframe, + # not throw an error, so it is safe to proceed input_data.require_data(variable=list(required_variables_set), exclude_on_fail=True) # supress warning about empty dataframe if filtering excludes all scenarios with warnings.catch_warnings(): diff --git a/tests/test_feature_kaya_variables.py b/tests/test_feature_kaya_variables.py index 5e785b517..d520cccea 100644 --- a/tests/test_feature_kaya_variables.py +++ b/tests/test_feature_kaya_variables.py @@ -121,10 +121,9 @@ def test_kaya_variables_logs_missing_variables(caplog): df_no_pop.compute.kaya_variables() # Check that the log message contains expected information - assert ( - "Variables missing for model: model_a, scenario: scen_a, region: World" - in caplog.text - ) + assert "model: model_a" in caplog.text + assert "scenario: scen_a" in caplog.text + assert "region: World" in caplog.text assert "Population" in caplog.text From c9438db07e1c906e81cb417854a76d56b541ff05 Mon Sep 17 00:00:00 2001 From: Zachary Schmidt Date: Sun, 8 Dec 2024 21:13:33 -0800 Subject: [PATCH 8/9] remove lmdi feature --- pyam/compute.py | 109 +--------- pyam/kaya/lmdi.py | 362 -------------------------------- pyam/kaya/lmdi_names.py | 7 - tests/test_feature_kaya_lmdi.py | 157 -------------- 4 files changed, 1 insertion(+), 634 deletions(-) delete mode 100644 pyam/kaya/lmdi.py delete mode 100644 pyam/kaya/lmdi_names.py delete mode 100644 tests/test_feature_kaya_lmdi.py diff --git a/pyam/compute.py b/pyam/compute.py index fbe35da1b..fdac1f20f 100644 --- a/pyam/compute.py +++ b/pyam/compute.py @@ -7,7 +7,7 @@ import pyam from pyam._debiasing import _compute_bias from pyam.index import replace_index_values -from pyam.kaya import kaya_factors, kaya_variables, lmdi +from pyam.kaya import kaya_factors, kaya_variables from pyam.timeseries import growth_rate from pyam.utils import remove_from_list @@ -362,113 +362,6 @@ def kaya_factors(self, append=False): ) return kaya_factors_frame - def kaya_lmdi(self, ref_scenario, int_scenario, append=False): - """Calculate the logarithmic mean Divisia index (LMDI) decomposition - using Kaya factors. - - Parameters - ---------- - ref_scenario : tuple of strings (model, scenario, region) - The (model, scenario, region) to be used as the reference scenario - in the LMDI calculation. - int_scenario : tuple of strings (model, scenario, region) - The (model, scenario, region) to be used as the intervention scenario - in the LMDI calculation. - append : bool, optional - Whether to append computed timeseries data to this instance. - - Returns - ------- - :class:`IamDataFrame` or **None** - Computed timeseries data or None if `append=True`. - - Notes - ----- - - Example of calling the method: - - .. code-block:: python - - df.compute.kaya_lmdi( - ref_scenario=("model_a", "scenario_a", "region_a"), - int_scenario=("model_b", "scenario_b", "region_b"), - append=True, - ) - - The IamDataFrame must contain the following variables, otherwise the method - will return None: - .. list-table:: - - Required Variables - - Population - - GDP (MER or PPP) - - Final Energy - - Primary Energy - - Primary Energy|Coal - - Primary Energy|Oil - - Primary Energy|Gas - - Emissions|CO2|Industrial Processes - - Emissions|CO2|Carbon Capture and Storage - - Emissions|CO2|Carbon Capture and Storage|Biomass - - Emissions|CO2|Fossil Fuels and Industry - - Emissions|CO2|AFOLU - - Carbon Sequestration|CCS|Fossil|Energy - - Carbon Sequestration|CCS|Fossil|Industrial Processes - - Carbon Sequestration|CCS|Biomass|Energy - - Carbon Sequestration|CCS|Biomass|Industrial Processes - - The model, scenario, and region fields for the results dataframe will be - concatenated values from the reference and intervention scenarios in the - form reference_scenario_value::intervention_scenario_value. - - Example results data: - - model scenario region variable unit year value - model_a::model_a scen_a::scen_b World::World FE/GNP (LMDI) unknown 2010 1.321788 - model_a::model_a scen_a::scen_b World::World GNP/P (LMDI) unknown 2010 0.000000 - model_a::model_a scen_a::scen_b World::World PEDEq/FE (LMDI) unknown 2010 0.816780 - model_a::model_a scen_a::scen_b World::World PEFF/PEDEq (LMDI) unknown 2010 0.000000 - model_a::model_a scen_a::scen_b World::World Population (LMDI) unknown 2010 0.000000 - model_a::model_a scen_a::scen_b World::World TFC/PEFF (LMDI) unknown 2010 4.853221 - """ - valid_ref_and_int_scenarios = _validate_kaya_scenario_args( - scenarios=[ref_scenario, int_scenario] - ) - # we must have two different scenarios to calculate kaya_lmdi - if (valid_ref_and_int_scenarios is None) or ( - len(valid_ref_and_int_scenarios) != 2 - ): - return None - kaya_factors = self.kaya_factors(valid_ref_and_int_scenarios, append=False) - if kaya_factors is None: - return None - kaya_lmdi_frame = lmdi.corrected_lmdi(kaya_factors, ref_scenario, int_scenario) - if kaya_lmdi_frame is None: - return None - if append: - self._df.append( - _find_non_duplicate_rows(self._df, kaya_lmdi_frame), inplace=True - ) - return kaya_lmdi_frame - - -def _validate_kaya_scenario_args(scenarios): - validated_scenarios = [] - for scenario in scenarios: - if (len(scenario) == 3) and _kaya_args_are_strings(scenario): - validated_scenarios.append(scenario) - # don't recalculate for identical scenarios - unique_scenarios = set(scenarios) - if len(unique_scenarios) == 0: - return None - return validated_scenarios - - -def _kaya_args_are_strings(scenario): - for arg in scenario: - if not isinstance(arg, str): - return False - return True - def _find_non_duplicate_rows(original_df, variables_to_add): variables_for_append = pyam.IamDataFrame( diff --git a/pyam/kaya/lmdi.py b/pyam/kaya/lmdi.py deleted file mode 100644 index 62f89ceda..000000000 --- a/pyam/kaya/lmdi.py +++ /dev/null @@ -1,362 +0,0 @@ -from functools import reduce - -import numpy as np -import pandas as pd - -import pyam -from pyam.kaya import ( - input_variable_names, - kaya_factor_names, - kaya_variable_names, - lmdi_names, -) - - -def corrected_lmdi(kaya_factors, ref_scenario, int_scenario): - ref_input = ( - kaya_factors.filter( - model=ref_scenario[0], scenario=ref_scenario[1], region=ref_scenario[2] - ) - .as_pandas() - .assign(scenario_class="reference") - ) - int_input = ( - kaya_factors.filter( - model=int_scenario[0], scenario=int_scenario[1], region=int_scenario[2] - ) - .as_pandas() - .assign(scenario_class="intervention") - ) - input = pyam.IamDataFrame(pd.concat([ref_input, int_input])) - - uncorrected = _uncorrected_lmdi(input) - non_neg = _lmdi_non_neg(uncorrected) - total_non_neg = _sum_lmdi_non_neg(non_neg) - total_w_neg = _tfc_diff(input) - difference = total_non_neg.append(total_w_neg).subtract( - "total_no_neg", "tfc_diff", "difference", append=False, ignore_units=True - ) - - lmdi_frames = [] - p_percent = _calc_percent_of_total_for_one_term( - non_neg, lmdi_names.Pop_LMDI, total_non_neg - ) - p_correction = p_percent.append(difference).multiply( - lmdi_names.Pop_LMDI, "difference", "correction", ignore_units=True - ) - p_corrected = p_correction.append(non_neg).add( - lmdi_names.Pop_LMDI, "correction", lmdi_names.Pop_LMDI, ignore_units=True - ) - lmdi_frames.append(p_corrected) - - gnp_per_p_percent = _calc_percent_of_total_for_one_term( - non_neg, lmdi_names.GNP_per_P_LMDI, total_non_neg - ) - gnp_per_p_correction = gnp_per_p_percent.append(difference).multiply( - lmdi_names.GNP_per_P_LMDI, "difference", "correction", ignore_units=True - ) - gnp_per_p_corrected = gnp_per_p_correction.append(non_neg).add( - lmdi_names.GNP_per_P_LMDI, - "correction", - lmdi_names.GNP_per_P_LMDI, - ignore_units=True, - ) - lmdi_frames.append(gnp_per_p_corrected) - - fe_per_gnp_percent = _calc_percent_of_total_for_one_term( - non_neg, lmdi_names.FE_per_GNP_LMDI, total_non_neg - ) - fe_per_gnp_correction = fe_per_gnp_percent.append(difference).multiply( - lmdi_names.FE_per_GNP_LMDI, "difference", "correction", ignore_units=True - ) - fe_per_gnp_corrected = fe_per_gnp_correction.append(non_neg).add( - lmdi_names.FE_per_GNP_LMDI, - "correction", - lmdi_names.FE_per_GNP_LMDI, - ignore_units=True, - ) - lmdi_frames.append(fe_per_gnp_corrected) - - pedeq_per_fe_percent = _calc_percent_of_total_for_one_term( - non_neg, lmdi_names.PEdeq_per_FE_LMDI, total_non_neg - ) - pedeq_per_fe_correction = pedeq_per_fe_percent.append(difference).multiply( - lmdi_names.PEdeq_per_FE_LMDI, "difference", "correction", ignore_units=True - ) - pedeq_per_fe_corrected = pedeq_per_fe_correction.append(non_neg).add( - lmdi_names.PEdeq_per_FE_LMDI, - "correction", - lmdi_names.PEdeq_per_FE_LMDI, - ignore_units=True, - ) - lmdi_frames.append(pedeq_per_fe_corrected) - - peff_per_pedeq_percent = _calc_percent_of_total_for_one_term( - non_neg, lmdi_names.PEFF_per_PEDEq_LMDI, total_non_neg - ) - peff_per_pedeq_correction = peff_per_pedeq_percent.append(difference).multiply( - lmdi_names.PEFF_per_PEDEq_LMDI, "difference", "correction", ignore_units=True - ) - peff_per_pedeq_corrected = peff_per_pedeq_correction.append(non_neg).add( - lmdi_names.PEFF_per_PEDEq_LMDI, - "correction", - lmdi_names.PEFF_per_PEDEq_LMDI, - ignore_units=True, - ) - lmdi_frames.append(peff_per_pedeq_corrected) - - tfc_per_peff_percent = _calc_percent_of_total_for_one_term( - non_neg, lmdi_names.TFC_per_PEFF_LMDI, total_non_neg - ) - tfc_per_peff_correction = tfc_per_peff_percent.append(difference).multiply( - lmdi_names.TFC_per_PEFF_LMDI, "difference", "correction", ignore_units=True - ) - tfc_per_peff_corrected = tfc_per_peff_correction.append(non_neg).add( - lmdi_names.TFC_per_PEFF_LMDI, - "correction", - lmdi_names.TFC_per_PEFF_LMDI, - ignore_units=True, - ) - lmdi_frames.append(tfc_per_peff_corrected) - - full_lmdi = reduce(lambda x, y: x.append(y), lmdi_frames) - full_lmdi_no_scenario_class_column = pyam.IamDataFrame( - full_lmdi.as_pandas().drop(columns="scenario_class") - ) - return full_lmdi_no_scenario_class_column - - -def _lmdi_non_neg(uncorrected): - p_non_neg = _calc_one_non_negative_term(uncorrected, lmdi_names.Pop_LMDI) - gnp_per_p_non_neg = _calc_one_non_negative_term( - uncorrected, lmdi_names.GNP_per_P_LMDI - ) - fe_per_gnp_non_neg = _calc_one_non_negative_term( - uncorrected, lmdi_names.FE_per_GNP_LMDI - ) - pedeq_per_fe_non_neg = _calc_one_non_negative_term( - uncorrected, lmdi_names.PEdeq_per_FE_LMDI - ) - peff_per_pedeq_non_neg = _calc_one_non_negative_term( - uncorrected, lmdi_names.PEFF_per_PEDEq_LMDI - ) - tfc_per_peff_non_neg = _calc_one_non_negative_term( - uncorrected, lmdi_names.TFC_per_PEFF_LMDI - ) - - return ( - p_non_neg.append(gnp_per_p_non_neg) - .append(fe_per_gnp_non_neg) - .append(pedeq_per_fe_non_neg) - .append(peff_per_pedeq_non_neg) - .append(tfc_per_peff_non_neg) - ) - - -def _sum_lmdi_non_neg(lmdi_non_neg): - lmdi_non_neg.add( - lmdi_names.Pop_LMDI, - lmdi_names.GNP_per_P_LMDI, - "sum_to_GNP_per_P_LMDI", - append=True, - ignore_units=True, - ) - lmdi_non_neg.add( - "sum_to_GNP_per_P_LMDI", - lmdi_names.FE_per_GNP_LMDI, - "sum_to_FE_per_GNP_LMDI", - append=True, - ignore_units=True, - ) - lmdi_non_neg.add( - "sum_to_FE_per_GNP_LMDI", - lmdi_names.PEdeq_per_FE_LMDI, - "sum_to_PEdeq_per_FE_LMDI", - append=True, - ignore_units=True, - ) - lmdi_non_neg.add( - "sum_to_PEdeq_per_FE_LMDI", - lmdi_names.PEFF_per_PEDEq_LMDI, - "sum_to_PEFF_per_PEDEq_LMDI", - append=True, - ignore_units=True, - ) - return lmdi_non_neg.add( - "sum_to_PEFF_per_PEDEq_LMDI", - lmdi_names.TFC_per_PEFF_LMDI, - "total_no_neg", - append=False, - ignore_units=True, - ) - - -def _calc_percent_of_total_for_one_term(non_neg, lmdi_term_name, tfc_diff): - return non_neg.append(tfc_diff).divide( - lmdi_term_name, "total_no_neg", lmdi_term_name, ignore_units=True - ) - - -def _tfc_diff(kaya_factors): - - (combined_model_name, combined_scenario_name, combined_region_name) = ( - _make_combined_scenario_name(kaya_factors.as_pandas()) - ) - tfc = ( - kaya_factors.filter( - variable=kaya_variable_names.TFC, scenario_class="reference" - ) - .rename(variable={kaya_variable_names.TFC: "tfc_ref"}) - .append( - kaya_factors.filter( - variable=kaya_variable_names.TFC, scenario_class="intervention" - ) - ) - ) - tfc = pyam.IamDataFrame( - tfc.as_pandas() - .assign(scenario_class="LMDI") - .assign( - model=combined_model_name, - scenario=combined_scenario_name, - region=combined_region_name, - ) - ) - return tfc.subtract( - "tfc_ref", kaya_variable_names.TFC, "tfc_diff", ignore_units=True - ) - - -def _calc_one_non_negative_term(uncorrected_lmdi, lmdi_term_name): - return uncorrected_lmdi.apply( - _remove_negative, lmdi_term_name, args=[lmdi_term_name], ignore_units=True - ) - - -def _remove_negative(lmdi_term): - return lmdi_term.clip(lower=0) - - -def _uncorrected_lmdi(kaya_factors): - - p = _calc_one_lmdi_term( - kaya_factors, input_variable_names.POPULATION, lmdi_names.Pop_LMDI - ) - gnp_per_p = _calc_one_lmdi_term( - kaya_factors, kaya_factor_names.GNP_per_P, lmdi_names.GNP_per_P_LMDI - ) - fe_per_gnp = _calc_one_lmdi_term( - kaya_factors, kaya_factor_names.FE_per_GNP, lmdi_names.FE_per_GNP_LMDI - ) - pe_deq_per_fe = _calc_one_lmdi_term( - kaya_factors, kaya_factor_names.PEdeq_per_FE, lmdi_names.PEdeq_per_FE_LMDI - ) - peff_per_pe_deq = _calc_one_lmdi_term( - kaya_factors, kaya_factor_names.PEFF_per_PEDEq, lmdi_names.PEFF_per_PEDEq_LMDI - ) - tfc_per_peff = _calc_one_lmdi_term( - kaya_factors, kaya_factor_names.TFC_per_PEFF, lmdi_names.TFC_per_PEFF_LMDI - ) - return ( - p.append(gnp_per_p) - .append(fe_per_gnp) - .append(pe_deq_per_fe) - .append(peff_per_pe_deq) - .append(tfc_per_peff) - ) - - -def _calc_one_lmdi_term( - input_data, - kaya_factor_name, - lmdi_term_name, - kaya_product_name=kaya_variable_names.TFC, -): - return input_data.apply( - _lmdi, - lmdi_term_name, - axis="variable", - args=[kaya_factor_name, kaya_product_name], - ignore_units=True, - ) - - -def _lmdi(kaya_factor, kaya_product): - - (combined_model_name, combined_scenario_name, combined_region_name) = ( - _make_combined_scenario_name(kaya_factor) - ) - - factor_ref = ( - kaya_factor.reset_index() - .query('scenario_class == "reference"') - .assign( - model=combined_model_name, - scenario=combined_scenario_name, - region=combined_region_name, - ) - .assign(scenario_class="LMDI") - .set_index(list(kaya_factor.reset_index().columns[:-1])) - .rename(columns=lambda x: "value") - ) - - factor_int = ( - kaya_factor.reset_index() - .query('scenario_class == "intervention"') - .assign( - model=combined_model_name, - scenario=combined_scenario_name, - region=combined_region_name, - ) - .assign(scenario_class="LMDI") - .set_index(list(kaya_factor.reset_index().columns[:-1])) - .rename(columns=lambda x: "value") - ) - tfc_ref = ( - kaya_product.reset_index() - .query('scenario_class == "reference"') - .assign( - model=combined_model_name, - scenario=combined_scenario_name, - region=combined_region_name, - ) - .assign(scenario_class="LMDI") - .set_index(list(kaya_factor.reset_index().columns[:-1])) - .rename(columns=lambda x: "value") - ) - tfc_int = ( - kaya_product.reset_index() - .query('scenario_class == "intervention"') - .assign( - model=combined_model_name, - scenario=combined_scenario_name, - region=combined_region_name, - ) - .assign(scenario_class="LMDI") - .set_index(list(kaya_factor.reset_index().columns[:-1])) - .rename(columns=lambda x: "value") - ) - return ( - ((tfc_ref - tfc_int) / (np.log(tfc_ref) - np.log(tfc_int))) - * (np.log(factor_ref / factor_int)) - ).squeeze(axis=1) - - -def _make_combined_scenario_name(kaya_factor): - ref = kaya_factor.reset_index().query('scenario_class == "reference"') - int = kaya_factor.reset_index().query('scenario_class == "intervention"') - - ref_model_name = ref.model.values[0] - int_model_name = int.model.values[0] - - ref_scenario_name = ref.scenario.values[0] - int_scenario_name = int.scenario.values[0] - - ref_region_name = ref.region.values[0] - int_region_name = int.region.values[0] - - return ( - ref_model_name + "::" + int_model_name, - ref_scenario_name + "::" + int_scenario_name, - ref_region_name + "::" + int_region_name, - ) diff --git a/pyam/kaya/lmdi_names.py b/pyam/kaya/lmdi_names.py deleted file mode 100644 index 4f47a5c7c..000000000 --- a/pyam/kaya/lmdi_names.py +++ /dev/null @@ -1,7 +0,0 @@ -Pop_LMDI = "Population (LMDI)" -GNP_per_P_LMDI = "GNP/P (LMDI)" -FE_per_GNP_LMDI = "FE/GNP (LMDI)" -PEdeq_per_FE_LMDI = "PEDEq/FE (LMDI)" -PEFF_per_PEDEq_LMDI = "PEFF/PEDEq (LMDI)" -TFC_per_PEFF_LMDI = "TFC/PEFF (LMDI)" -NFC_per_TFC_LMDI = "NFC/TFC (LMDI)" diff --git a/tests/test_feature_kaya_lmdi.py b/tests/test_feature_kaya_lmdi.py deleted file mode 100644 index 87967f909..000000000 --- a/tests/test_feature_kaya_lmdi.py +++ /dev/null @@ -1,157 +0,0 @@ -import pandas as pd -import pytest - -from pyam import IamDataFrame -from pyam.kaya import input_variable_names, lmdi_names -from pyam.testing import assert_iamframe_equal - -TEST_DF = IamDataFrame( - pd.DataFrame( - [ - [input_variable_names.POPULATION, "million", 1000], - [input_variable_names.GDP_PPP, "billion USD_2005/yr", 6], - [input_variable_names.GDP_MER, "billion USD_2005/yr", 5], - [input_variable_names.FINAL_ENERGY, "EJ/yr", 8], - [input_variable_names.PRIMARY_ENERGY, "EJ/yr", 10], - [input_variable_names.PRIMARY_ENERGY_COAL, "EJ/yr", 5], - [input_variable_names.PRIMARY_ENERGY_GAS, "EJ/yr", 2], - [input_variable_names.PRIMARY_ENERGY_OIL, "EJ/yr", 2], - [ - input_variable_names.EMISSIONS_CO2_FOSSIL_FUELS_AND_INDUSTRY, - "Mt CO2/yr", - 10, - ], - [input_variable_names.EMISSIONS_CO2_INDUSTRIAL_PROCESSES, "Mt CO2/yr", 1], - [input_variable_names.EMISSIONS_CO2_AFOLU, "Mt CO2/yr", 1], - [input_variable_names.EMISSIONS_CO2_CCS, "Mt CO2/yr", 4], - [input_variable_names.EMISSIONS_CO2_CCS_BIOMASS, "Mt CO2/yr", 1], - [input_variable_names.CCS_FOSSIL_ENERGY, "Mt CO2/yr", 2], - [input_variable_names.CCS_FOSSIL_INDUSTRY, "Mt CO2/yr", 1], - [input_variable_names.CCS_BIOMASS_ENERGY, "Mt CO2/yr", 0.5], - [input_variable_names.CCS_BIOMASS_INDUSTRY, "Mt CO2/yr", 0.5], - ], - columns=["variable", "unit", 2010], - ), - model="model_a", - scenario="scen_a", - region="World", -).append( - IamDataFrame( - pd.DataFrame( - [ - [input_variable_names.POPULATION, "million", 1001], - [input_variable_names.GDP_PPP, "billion USD_2005/yr", 7], - [input_variable_names.GDP_MER, "billion USD_2005/yr", 6], - [input_variable_names.FINAL_ENERGY, "EJ/yr", 9], - [input_variable_names.PRIMARY_ENERGY, "EJ/yr", 11], - [input_variable_names.PRIMARY_ENERGY_COAL, "EJ/yr", 6], - [input_variable_names.PRIMARY_ENERGY_GAS, "EJ/yr", 3], - [input_variable_names.PRIMARY_ENERGY_OIL, "EJ/yr", 3], - [ - input_variable_names.EMISSIONS_CO2_FOSSIL_FUELS_AND_INDUSTRY, - "Mt CO2/yr", - 13, - ], - [ - input_variable_names.EMISSIONS_CO2_INDUSTRIAL_PROCESSES, - "Mt CO2/yr", - 2, - ], - [input_variable_names.EMISSIONS_CO2_AFOLU, "Mt CO2/yr", 2], - [input_variable_names.EMISSIONS_CO2_CCS, "Mt CO2/yr", 5], - [input_variable_names.EMISSIONS_CO2_CCS_BIOMASS, "Mt CO2/yr", 2], - [input_variable_names.CCS_FOSSIL_ENERGY, "Mt CO2/yr", 3], - [input_variable_names.CCS_FOSSIL_INDUSTRY, "Mt CO2/yr", 2], - [input_variable_names.CCS_BIOMASS_ENERGY, "Mt CO2/yr", 1.5], - [input_variable_names.CCS_BIOMASS_INDUSTRY, "Mt CO2/yr", 1.5], - ], - columns=["variable", "unit", 2010], - ), - model="model_a", - scenario="scen_b", - region="World", - ) -) - - -EXP_DF = IamDataFrame( - pd.DataFrame( - [ - [lmdi_names.FE_per_GNP_LMDI, "unknown", 1.321788], - [lmdi_names.GNP_per_P_LMDI, "unknown", 0], - [lmdi_names.PEdeq_per_FE_LMDI, "unknown", 0.816780], - [lmdi_names.PEFF_per_PEDEq_LMDI, "unknown", 0], - [lmdi_names.Pop_LMDI, "unknown", 0], - [lmdi_names.TFC_per_PEFF_LMDI, "unknown", 4.853221], - ], - columns=["variable", "unit", 2010], - ), - model="model_a::model_a", - scenario="scen_a::scen_b", - region="World::World", -) - - -@pytest.mark.parametrize("append", (False, True)) -def test_kaya_lmdi(append): - """Test computing kaya LMDI""" - - if append: - obs = TEST_DF.copy() - obs.compute.kaya_lmdi( - ref_scenario=("model_a", "scen_a", "World"), - int_scenario=("model_a", "scen_b", "World"), - append=True, - ) - assert_iamframe_equal(TEST_DF.append(EXP_DF), obs) - else: - obs = TEST_DF.compute.kaya_lmdi( - ref_scenario=("model_a", "scen_a", "World"), - int_scenario=("model_a", "scen_b", "World"), - ) - assert_iamframe_equal(EXP_DF, obs) - - -@pytest.mark.parametrize("append", (False, True)) -def test_kaya_lmdi_none_when_input_variables_missing(append): - """Assert that computing kaya LMDI with - missing input variables returns None - """ - - if append: - obs = TEST_DF.copy() - # select subset of required input variables - ( - obs.filter(variable=input_variable_names.POPULATION).compute.kaya_lmdi( - ref_scenario=("model_a", "scen_a", "World"), - int_scenario=("model_a", "scen_b", "World"), - append=True, - ) - ) - # assert that no data was added - assert_iamframe_equal(TEST_DF, obs) - else: - obs = TEST_DF.filter( - variable=input_variable_names.POPULATION - ).compute.kaya_lmdi( - ref_scenario=("model_a", "scen_a", "World"), - int_scenario=("model_a", "scen_b", "World"), - ) - assert obs is None - - -def test_calling_kaya_lmdi_multiple_times(): - """Test calling the method a second time has no effect""" - - obs = TEST_DF.copy() - obs.compute.kaya_lmdi( - ref_scenario=("model_a", "scen_a", "World"), - int_scenario=("model_a", "scen_b", "World"), - append=True, - ) - obs.compute.kaya_lmdi( - ref_scenario=("model_a", "scen_a", "World"), - int_scenario=("model_a", "scen_b", "World"), - append=True, - ) - assert_iamframe_equal(TEST_DF.append(EXP_DF), obs) From 126283af08ee6e28d97389c1f27ea56aad7c6da3 Mon Sep 17 00:00:00 2001 From: Zachary Schmidt Date: Fri, 27 Jun 2025 12:01:22 -0700 Subject: [PATCH 9/9] Change the main functions in the kaya_factors and kaya_variables modules to compute_ kaya_factors and compute_kaya_variables. Also add test case for GDP (MER) fallback for Kaya Factors calculation. --- pyam/compute.py | 4 ++-- pyam/kaya/kaya_factors.py | 2 +- pyam/kaya/kaya_variables.py | 2 +- tests/test_feature_kaya_factors.py | 32 ++++++++++++++++++++++++++++++ 4 files changed, 36 insertions(+), 4 deletions(-) diff --git a/pyam/compute.py b/pyam/compute.py index fdac1f20f..5e0729e1b 100644 --- a/pyam/compute.py +++ b/pyam/compute.py @@ -296,7 +296,7 @@ def kaya_variables(self, append=False): """ - kaya_variables_frame = kaya_variables.kaya_variables(self._df) + kaya_variables_frame = kaya_variables.compute_kaya_variables(self._df) if kaya_variables_frame is None: return None if append: @@ -353,7 +353,7 @@ def kaya_factors(self, append=False): kaya_variables = self.kaya_variables(append=False) if kaya_variables is None: return None - kaya_factors_frame = kaya_factors.kaya_factors(kaya_variables) + kaya_factors_frame = kaya_factors.compute_kaya_factors(kaya_variables) if kaya_factors_frame is None: return None if append: diff --git a/pyam/kaya/kaya_factors.py b/pyam/kaya/kaya_factors.py index 081ed077d..fa7c0a87e 100644 --- a/pyam/kaya/kaya_factors.py +++ b/pyam/kaya/kaya_factors.py @@ -2,7 +2,7 @@ from pyam.kaya import input_variable_names, kaya_factor_names, kaya_variable_names -def kaya_factors(kaya_variables_frame): +def compute_kaya_factors(kaya_variables_frame): kaya_factors = pyam.concat( [ _calc_gnp_per_p(kaya_variables_frame), diff --git a/pyam/kaya/kaya_variables.py b/pyam/kaya/kaya_variables.py index 370db6f0d..4b4869719 100644 --- a/pyam/kaya/kaya_variables.py +++ b/pyam/kaya/kaya_variables.py @@ -13,7 +13,7 @@ ] -def kaya_variables(input_data): +def compute_kaya_variables(input_data): if _is_input_data_incomplete(input_data): return None diff --git a/tests/test_feature_kaya_factors.py b/tests/test_feature_kaya_factors.py index fa0a14616..5baebd882 100644 --- a/tests/test_feature_kaya_factors.py +++ b/tests/test_feature_kaya_factors.py @@ -113,3 +113,35 @@ def test_calling_kaya_factors_multiple_times(): obs.compute.kaya_factors(append=True) obs.compute.kaya_factors(append=True) assert_iamframe_equal(TEST_DF.append(EXP_DF_FOR_APPEND), obs) + +def test_kaya_factors_uses_gdp_mer_fallback(): + """Test that kaya_factors uses GDP_MER when GDP_PPP is not available""" + # Create test data without GDP_PPP + df_no_gdp_ppp = TEST_DF.filter(variable="GDP|PPP", keep=False) + + # Create expected result using GDP|MER instead of GDP|PPP for calculations + exp_no_gdp_ppp = IamDataFrame( + pd.DataFrame( + [ # 8 EJ / 5 billion USD = 1.6 + ["FE/GNP", "EJ / USD / billion", 1.6], + # 5 billion USD / 1000 million = 0.005 + ["GNP/P", "USD * billion / million / a", 0.005], + ["NFC/TFC", "", 0.833333], + ["PEDEq/FE", "", 1.250000], + ["PEFF/PEDEq", "", 0.900000], + ["TFC/PEFF", "Mt CO2/EJ", 1.333333], + ["Population", "million", 1000], + ["Total Fossil Carbon", "Mt CO2/yr", 12.0], + ], + columns=["variable", "unit", 2010], + ), + model="model_a", + scenario="scen_a", + region="World", + ) + + # Compute kaya factors + obs = df_no_gdp_ppp.compute.kaya_factors() + + # Verify results match expected + assert_iamframe_equal(exp_no_gdp_ppp, obs)