Skip to content

Commit

Permalink
continue notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
konstantinstadler committed Jan 29, 2025
1 parent dd4b983 commit 8a6ede1
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 72 deletions.
52 changes: 42 additions & 10 deletions doc/source/notebooks/GLAM_EXIO_link.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,17 +39,17 @@
from pathlib import Path
import pymrio

import warnings
import pandas as pd
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)


# %% [markdown]
# Next, we specify were data should be stored


# %%
DATA_ROOT = Path("/tmp/glam_exio_tutorial") # set this to your data directory
# TODO: Fix back
# DATA_ROOT = Path("/tmp/glam_exio_tutorial") # set this to your data directory
DATA_ROOT = Path("/home/konstans/tmp/glam_exio_tutorial") # set this to your data directory

EXIOBASE_STORAGE_FOLDER = DATA_ROOT / "exiobase"
GLAM_STORAGE_FOLDER = DATA_ROOT / "glam"
Expand Down Expand Up @@ -91,6 +91,13 @@
# %%
GLAM_char = pymrio.GLAMprocessing.prep_GLAM(GLAM_data=GLAM_raw)

# TODO: remove later, just for fast testing
GLAM_char_archive = GLAM_char.copy()

# TODO: remove later
# take 10000 random samples:
GLAM_char = GLAM_char_archive.sample(10000)

# %% [markdown]
# This results in a long table with all characterization factors from GLAM.
# We can then later use this table to characterize EXIOBASE flows after renaming to GLAM flow names.
Expand Down Expand Up @@ -159,13 +166,37 @@
# We are now ready to convert these stressors to GLAM flows. To do so we use the convert function of Pymrio.
# This function can be used for many more things and is [explained in detail in the notebook here](./convert.ipynb)

# TODO: remove later, just a fast way to save and load for pymrio development

EXIO3_TMP = Path(EXIOBASE_STORAGE_FOLDER / "TMP_2018")
EXIO3_TMP.mkdir(parents=True, exist_ok=True)
exio3.save_all(EXIO3_TMP, table_format="parquet")

import pymrio
import pyinstrument

exio3 = pymrio.load_all(EXIO3_TMP)
exio3.reset_all_full()

# %%
debug_bridge = exio_glam_bridge

with pyinstrument.Profiler() as p:
debug_sat = exio3.satellite.convert(
debug_bridge, new_extension_name="GLAM flows",
unit_column_orig="EXIOBASE_unit",
unit_column_new="FLOW_unit",
ignore_columns=["comment"]
)
debug_sat.F


exio3.glam_flows = exio3.satellite.convert(
exio_glam_bridge, new_extension_name="GLAM flows",
unit_column_orig="EXIOBASE_unit",
unit_column_new="FLOW_unit",
ignore_columns=["comment"]
)
exio_glam_bridge, new_extension_name="GLAM flows",
unit_column_orig="EXIOBASE_unit",
unit_column_new="FLOW_unit",
ignore_columns=["comment"]
)

# %% [markdown]
# This now gives us a new satellite account "glam_flows".
Expand All @@ -175,7 +206,6 @@

# %% [markdown]
# With flow names corresponding to GLAM flows.
# Since we already had consumption based account calculated in EXIOBASE before, we can immediately see the same for the GLAM flows.

# %%
exio3.glam_flows.D_cba
Expand Down Expand Up @@ -206,8 +236,10 @@

GLAM_char = GLAM_char.loc[GLAM_char.LCIAMethod_name__FLOW_uuid == "EQ Land use"]


# TODO: fix region error - use GLAM_char only with land use for that
# %%
# when debug, only one country (200 columns) and not the full dataset in there.
# must be as long as the full dataset, with 0 otherwise
exio3.glam_characterized = exio3.glam_flows.convert(
GLAM_char, new_extension_name="GLAM characterized"
)
Expand Down
106 changes: 44 additions & 62 deletions pymrio/tools/ioutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -1006,7 +1006,9 @@ def check_df_map(df_orig, df_map):
# would be in effect given df_orig.
pass

import line_profiler

@line_profiler.profile
def convert(
df_orig, df_map, agg_func="sum", drop_not_bridged_index=True, ignore_columns=None
):
Expand Down Expand Up @@ -1176,74 +1178,54 @@ def convert(
# and renames by the new one (bridge.new)

already_renamed = dict()

for bridge in bridges:

# encountering a bridge with the same orig name but which should
# lead to two new index levels
if bridge.orig in already_renamed.keys():
# duplicate the index level
_index_order = list(df_collected.index.names)
df_collected.reset_index(
level=already_renamed[bridge.orig].new, inplace=True
)
df_collected[bridge.new] = df_cur_map.index.get_level_values(
bridge.raw
)[0]
if (len(df_collected.index.names) == 1) and (
df_collected.index.names[0] is None
):
df_collected.set_index(
already_renamed[bridge.orig].new,
drop=True,
append=False,
inplace=True,
)
else:
df_collected.set_index(
already_renamed[bridge.orig].new,
drop=True,
append=True,
inplace=True,
)
df_collected.set_index(bridge.new, drop=True, append=True, inplace=True)
df_collected.index = df_collected.index.reorder_levels(
_index_order + [bridge.new]
)
# already renamed the index to another one previously,
# but we need to create more index levels for the
# same original index level
new_index_value = df_cur_map.index.get_level_values(bridge.raw)[0]
_old_index = df_collected.index.to_frame()
# as we go along in order, we add them to the end of the index
_old_index.insert(len(_old_index.columns), bridge.new, new_index_value)
df_collected.index = pd.MultiIndex.from_frame(_old_index)

continue

for idx_old_names in df_collected.index.names:
if bridge.orig in idx_old_names:
# rename the index names
if isinstance(df_collected.index, pd.MultiIndex):
df_collected.index = df_collected.index.set_names(
bridge.new, level=idx_old_names
)
else:
df_collected.index = df_collected.index.set_names(
bridge.new, level=None
)

# rename the actual index values
df_collected.reset_index(level=bridge.new, inplace=True)
for row in df_cur_map.reset_index().iterrows():
new_row_name = row[1][bridge.raw]
old_row_name = row[1][bridge.orig]
df_collected.loc[:, bridge.new] = df_collected.loc[
:, bridge.new
].str.replace(pat=old_row_name, repl=new_row_name, regex=True)

# put the index back
if df_collected.index.name is None:
# The case with a single index where the previous reset index
# left only a numerical index
df_collected.set_index(
bridge.new, drop=True, append=False, inplace=True
)
else:
df_collected.set_index(
bridge.new, drop=True, append=True, inplace=True
)
already_renamed[bridge.orig] = bridge
else:

for idx_old_names in df_collected.index.names:
if bridge.orig in idx_old_names:
# rename the index names
if isinstance(df_collected.index, pd.MultiIndex):
df_collected.index = df_collected.index.set_names(
bridge.new, level=idx_old_names
)
else:
df_collected.index = df_collected.index.set_names(
bridge.new, level=None
)

# rename the actual index values
df_collected = df_collected.reset_index(level=bridge.new)
for row in df_cur_map.reset_index().iterrows():
new_row_name = row[1][bridge.raw]
old_row_name = row[1][bridge.orig]
df_collected.loc[:, bridge.new] = df_collected.loc[
:, bridge.new
].str.replace(pat=old_row_name, repl=new_row_name, regex=True)

# put the index back
if df_collected.index.name is None:
# The case with a single index where the previous reset index
# left only a numerical index
df_collected = df_collected.set_index(bridge.new, drop=True, append=False)
else:
df_collected = df_collected.set_index(bridge.new, drop=True, append=True)

already_renamed[bridge.orig] = bridge

res_collector.append(
df_collected.groupby(by=df_collected.index.names).agg(agg_func)
Expand Down

0 comments on commit 8a6ede1

Please sign in to comment.