Skip to content

Commit

Permalink
Merge pull request #65 from brightway-lca/test_patchwork
Browse files Browse the repository at this point in the history
Add option to build lci directly from timeline and performance improvements
  • Loading branch information
TimoDiepers authored Jul 11, 2024
2 parents 0139dc5 + 18b0771 commit 7f5b197
Show file tree
Hide file tree
Showing 6 changed files with 2,583 additions and 218 deletions.
3 changes: 2 additions & 1 deletion bw_timex/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from .dynamic_biosphere_builder import DynamicBiosphereBuilder
from .dynamic_characterization import DynamicCharacterization
from .dynamic_biosphere_builder import DynamicBiosphereBuilder
from .edge_extractor import EdgeExtractor
from .matrix_modifier import MatrixModifier
from .timeline_builder import TimelineBuilder
from .timex_lca import TimexLCA
from .helper_classes import SetList

__version__ = "0.1.5"
255 changes: 157 additions & 98 deletions bw_timex/dynamic_biosphere_builder.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
from scipy import sparse as sp
import pandas as pd
import numpy as np
import bw2data as bd
from bw_temporalis import TemporalDistribution
from .remapping import TimeMappingDict
import numpy as np
import pandas as pd
from bw2calc import LCA
from datetime import datetime
from bw_temporalis import TemporalDistribution
from scipy import sparse as sp

from .helper_classes import SetList
from .utils import convert_date_string_to_datetime


class DynamicBiosphereBuilder:
"""
This class is used to build a dynamic biosphere matrix, which in contrast to the normal biosphere matrix has rows for each biosphere flow at their time of emission.
This class is used to build a dynamic biosphere matrix, which in contrast to the normal biosphere matrix has rows for each biosphere flow at their time of emission.
Thus, the dimensions are (bio_flows at a specific timestep) x (processes).
"""

Expand All @@ -25,8 +25,11 @@ def __init__(
temporal_grouping: str,
database_date_dict: dict,
database_date_dict_static_only: dict,
timeline: pd.DataFrame,
interdatabase_activity_mapping: SetList,
from_timeline: bool = False,
) -> None:
"""
"""
Initializes the DynamicBiosphereBuilder object.
Parameters
Expand All @@ -47,7 +50,13 @@ def __init__(
A dictionary mapping database names to their respective date
database_date_dict_static_only : dict
A dictionary mapping database names to their respective date, but only containing static databases, which are the background databases.
timeline: pd.DataFrame
The edge timeline, created from TimexLCA.build_timeline()
interdatabase_activity_mapping : SetList
A list of sets, where each set contains the activity ids of the same activity in different databases
from_timeline : bool, optional
A boolean indicating if the dynamic biosphere matrix is built directly from the timeline. Default is False.
Returns
-------
None
Expand All @@ -62,8 +71,14 @@ def __init__(
}

self.lca_obj = lca_obj
self.technosphere_matrix = lca_obj.technosphere_matrix
self.activity_dict = lca_obj.dicts.activity
if not from_timeline:
self.technosphere_matrix = (
lca_obj.technosphere_matrix.tocsc()
) # convert to csc as this is only used for column slicing
self.dynamic_supply_array = lca_obj.supply_array
self.activity_dict = lca_obj.dicts.activity
else:
self.dynamic_supply_array = timeline.amount.values.astype(float)
self.activity_time_mapping_dict = activity_time_mapping_dict
self.biosphere_time_mapping_dict = biosphere_time_mapping_dict
self.demand_timing_dict = demand_timing_dict
Expand All @@ -72,133 +87,177 @@ def __init__(
self.temporal_grouping = temporal_grouping
self.database_date_dict = database_date_dict
self.database_date_dict_static_only = database_date_dict_static_only
self.dynamic_supply_array = lca_obj.supply_array
self.timeline = timeline
self.interdatabase_activity_mapping = interdatabase_activity_mapping
self.rows = []
self.cols = []
self.values = []

def build_dynamic_biosphere_matrix(self):
def build_dynamic_biosphere_matrix(
self,
from_timeline: bool = False,
):
"""
This function creates a separate biosphere matrix, with the dimenions (bio_flows at a specific timestep) x (processes).
Every temporally resolved biosphere flow has its own row in the matrix, making it highly sparse.
The timing of the emitting process and potential additional temporal information of the bioshpere flow (e.g. delay of emission compared to timing of process) are considered.
Parameters
----------
None
from_timeline
Returns
-------
dynamic_biomatrix : scipy.sparse.csr_matrix
A sparse matrix with the dimensions (bio_flows at a specific timestep) x (processes), where every row represents a biosphere flow at a specific time.
"""

for id in self.node_id_collection_dict["temporalized_processes"]:
process_col_index = self.activity_dict[id] # get the matrix column index
for row in self.timeline.itertuples():
idx = row.time_mapped_producer
if from_timeline:
process_col_index = row.Index
else:
process_col_index = self.activity_dict[
idx
] # get the matrix column index

(
(original_db, original_code),
time,
) = self.activity_time_mapping_dict.reversed()[ # time is here an integer, with various length depending on temporal grouping, e.g. [Y] -> 2024, [M] - > 202401
id
idx
]

time_in_datetime = convert_date_string_to_datetime(
self.temporal_grouping, str(time)
) # now time is a datetime

td_producer = TemporalDistribution(
date=np.array([time_in_datetime], dtype=self.time_res),
amount=np.array([1]),
).date
date = td_producer[0]

act = bd.get_node(database=original_db, code=original_code)

for exc in act.biosphere():
if exc.get("temporal_distribution"):
td_dates = exc["temporal_distribution"].date # time_delta
td_values = exc["temporal_distribution"].amount
dates = (
td_producer + td_dates
) # we can add a datetime of length 1 to a timedelta of length N without problems
values = exc["amount"] * td_values

else: # exchange has no TD
dates = td_producer # datetime array, same time as producer
values = [exc["amount"]]

# Add entries to dynamic bio matrix
for date, amount in zip(dates, values):

# first create a row index for the tuple((db, bioflow), date))
time_mapped_matrix_id = self.biosphere_time_mapping_dict.add(
(exc.input, date)
)

# populate lists with which sparse matrix is constructed
self.add_matrix_entry_for_biosphere_flows(
row=time_mapped_matrix_id,
col=process_col_index,
amount=amount,
)

for id in self.node_id_collection_dict["temporal_markets"]:
process_col_index = self.activity_dict[id] # get the matrix column index
technosphere_column = (
self.technosphere_matrix[:, process_col_index].toarray().flatten()
) # 1-d np.array
demand = dict()
for idx, amount in enumerate(technosphere_column):
if idx == self.activity_dict[id]: # Skip production exchange
continue
if amount == 0:
continue

node_id = self.activity_dict.reversed[idx]

if (
node_id in self.node_id_collection_dict["foreground_node_ids"]
): # We only aggregate background process bioflows
continue

# demand[bd.get_node(id=node_id)] = -amount
demand[node_id] = -amount

self.lca_obj.redo_lci(demand)
aggregated_inventory = self.lca_obj.inventory.sum(
axis=1
) # aggregated biosphere flows of background supply chain emissions. Rows are bioflows.

for idx, amount in enumerate(aggregated_inventory.flatten().tolist()[0]):
bioflow = bd.get_activity(self.lca_obj.dicts.biosphere.reversed[idx])
((_, _), time) = self.activity_time_mapping_dict.reversed()[id]
if idx in self.node_id_collection_dict["temporalized_processes"]:

time_in_datetime = convert_date_string_to_datetime(
self.temporal_grouping, str(time)
) # now time is a datetime

td_producer = TemporalDistribution(
date=np.array([str(time_in_datetime)], dtype=self.time_res),
date=np.array([time_in_datetime], dtype=self.time_res),
amount=np.array([1]),
).date # TODO: Simplify
).date
date = td_producer[0]

time_mapped_matrix_id = self.biosphere_time_mapping_dict.add(
(bioflow, date)
)
act = bd.get_node(database=original_db, code=original_code)

for exc in act.biosphere():
if exc.get("temporal_distribution"):
td_dates = exc["temporal_distribution"].date # time_delta
td_values = exc["temporal_distribution"].amount
dates = (
td_producer + td_dates
) # we can add a datetime of length 1 to a timedelta of length N without problems
values = exc["amount"] * td_values

else: # exchange has no TD
dates = td_producer # datetime array, same time as producer
values = [exc["amount"]]

# Add entries to dynamic bio matrix
for date, amount in zip(dates, values):

# first create a row index for the tuple((db, bioflow), date))
time_mapped_matrix_id = self.biosphere_time_mapping_dict.add(
(exc.input.id, date)
)

# populate lists with which sparse matrix is constructed
self.add_matrix_entry_for_biosphere_flows(
row=time_mapped_matrix_id,
col=process_col_index,
amount=amount,
)
elif idx in self.node_id_collection_dict["temporal_markets"]:
if from_timeline:
demand = self.demand_from_timeline(row, original_db)
else:
demand = self.demand_from_technosphere(idx)

self.lca_obj.redo_lci(demand)

aggregated_inventory = self.lca_obj.inventory.sum(
axis=1
) # aggregated biosphere flows of background supply chain emissions. Rows are bioflows.

for row_idx, amount in enumerate(aggregated_inventory.A1):
bioflow = self.lca_obj.dicts.biosphere.reversed[row_idx]
((_, _), time) = self.activity_time_mapping_dict.reversed()[idx]

time_in_datetime = convert_date_string_to_datetime(
self.temporal_grouping, str(time)
) # now time is a datetime

td_producer = TemporalDistribution(
date=np.array([str(time_in_datetime)], dtype=self.time_res),
amount=np.array([1]),
).date # TODO: Simplify
date = td_producer[0]

self.add_matrix_entry_for_biosphere_flows(
row=time_mapped_matrix_id, col=process_col_index, amount=amount
)
time_mapped_matrix_id = self.biosphere_time_mapping_dict.add(
(bioflow, date)
)

self.add_matrix_entry_for_biosphere_flows(
row=time_mapped_matrix_id, col=process_col_index, amount=amount
)

# now build the dynamic biosphere matrix
shape = (max(self.rows) + 1, len(self.activity_time_mapping_dict))
if from_timeline:
ncols = len(self.timeline)
else:
ncols = len(self.activity_time_mapping_dict)
shape = (max(self.rows) + 1, ncols)
dynamic_biomatrix = sp.coo_matrix((self.values, (self.rows, self.cols)), shape)
self.dynamic_biomatrix = dynamic_biomatrix.tocsr()

return self.dynamic_biomatrix

def demand_from_timeline(self, row, original_db):
"""
Returns a demand dict directly from the timeline row
and its interpolation_weights.
"""
demand = {}
for db, amount in row.interpolation_weights.items():
# if not db in act_time_combinations.get(original_code): #check if act time combination already exists
[(timed_act_id, _)] = [
(act, db_name)
for (act, db_name) in self.interdatabase_activity_mapping[
(row.producer, original_db)
]
if db == db_name
]
# t_act = bd.get_activity(timed_act_id)
demand[timed_act_id] = amount
return demand

def demand_from_technosphere(self, idx):
"""
Returns a demand dict based on the technosphere colummn.
"""
process_col_index = self.activity_dict[idx] # get the matrix column index
technosphere_column = (
self.technosphere_matrix[:, process_col_index].toarray().ravel()
) # 1-d np.array
demand = {}
for row_idx, amount in enumerate(technosphere_column):
if row_idx == self.activity_dict[idx]: # Skip production exchange
continue
if amount == 0:
continue

node_id = self.activity_dict.reversed[row_idx]

if (
node_id in self.node_id_collection_dict["foreground_node_ids"]
): # We only aggregate background process bioflows
continue

# demand[bd.get_node(id=node_id)] = -amount
demand[node_id] = -amount
return demand

def add_matrix_entry_for_biosphere_flows(self, row, col, amount):
"""
Adds an entry to the lists of row, col and values, which are then used to construct the dynamic biosphere matrix.
Expand Down
Loading

0 comments on commit 7f5b197

Please sign in to comment.