Merge pull request #65 from brightway-lca/test_patchwork

Add option to build lci directly from timeline and performance improvements
brightway-lca · Jul 11, 2024 · 7f5b197 · 7f5b197
2 parents 0139dc5 + 18b0771
commit 7f5b197
Show file tree

Hide file tree

Showing 6 changed files with 2,583 additions and 218 deletions.
diff --git a/bw_timex/__init__.py b/bw_timex/__init__.py
@@ -1,8 +1,9 @@
-from .dynamic_biosphere_builder import DynamicBiosphereBuilder
 from .dynamic_characterization import DynamicCharacterization
+from .dynamic_biosphere_builder import DynamicBiosphereBuilder
 from .edge_extractor import EdgeExtractor
 from .matrix_modifier import MatrixModifier
 from .timeline_builder import TimelineBuilder
 from .timex_lca import TimexLCA
+from .helper_classes import SetList
 
 __version__ = "0.1.5"
diff --git a/bw_timex/dynamic_biosphere_builder.py b/bw_timex/dynamic_biosphere_builder.py
@@ -1,17 +1,17 @@
-from scipy import sparse as sp
-import pandas as pd
-import numpy as np
 import bw2data as bd
-from bw_temporalis import TemporalDistribution
-from .remapping import TimeMappingDict
+import numpy as np
+import pandas as pd
 from bw2calc import LCA
-from datetime import datetime
+from bw_temporalis import TemporalDistribution
+from scipy import sparse as sp
+
+from .helper_classes import SetList
 from .utils import convert_date_string_to_datetime
 
 
 class DynamicBiosphereBuilder:
     """
-    This class is used to build a dynamic biosphere matrix, which in contrast to the normal biosphere matrix has rows for each biosphere flow at their time of emission. 
+    This class is used to build a dynamic biosphere matrix, which in contrast to the normal biosphere matrix has rows for each biosphere flow at their time of emission.
     Thus, the dimensions are (bio_flows at a specific timestep) x (processes).
     """
 
@@ -25,8 +25,11 @@ def __init__(
         temporal_grouping: str,
         database_date_dict: dict,
         database_date_dict_static_only: dict,
+        timeline: pd.DataFrame,
+        interdatabase_activity_mapping: SetList,
+        from_timeline: bool = False,
     ) -> None:
-        """ 
+        """
         Initializes the DynamicBiosphereBuilder object.
 
         Parameters
@@ -47,7 +50,13 @@ def __init__(
             A dictionary mapping database names to their respective date
         database_date_dict_static_only : dict
             A dictionary mapping database names to their respective date, but only containing static databases, which are the background databases.
- 
+        timeline: pd.DataFrame
+            The edge timeline, created from TimexLCA.build_timeline()
+        interdatabase_activity_mapping : SetList
+            A list of sets, where each set contains the activity ids of the same activity in different databases
+        from_timeline : bool, optional
+            A boolean indicating if the dynamic biosphere matrix is built directly from the timeline. Default is False.
+
         Returns
         -------
         None
@@ -62,8 +71,14 @@ def __init__(
         }
 
         self.lca_obj = lca_obj
-        self.technosphere_matrix = lca_obj.technosphere_matrix
-        self.activity_dict = lca_obj.dicts.activity
+        if not from_timeline:
+            self.technosphere_matrix = (
+                lca_obj.technosphere_matrix.tocsc()
+            )  # convert to csc as this is only used for column slicing
+            self.dynamic_supply_array = lca_obj.supply_array
+            self.activity_dict = lca_obj.dicts.activity
+        else:
+            self.dynamic_supply_array = timeline.amount.values.astype(float)
         self.activity_time_mapping_dict = activity_time_mapping_dict
         self.biosphere_time_mapping_dict = biosphere_time_mapping_dict
         self.demand_timing_dict = demand_timing_dict
@@ -72,133 +87,177 @@ def __init__(
         self.temporal_grouping = temporal_grouping
         self.database_date_dict = database_date_dict
         self.database_date_dict_static_only = database_date_dict_static_only
-        self.dynamic_supply_array = lca_obj.supply_array
+        self.timeline = timeline
+        self.interdatabase_activity_mapping = interdatabase_activity_mapping
         self.rows = []
         self.cols = []
         self.values = []
 
-    def build_dynamic_biosphere_matrix(self):
+    def build_dynamic_biosphere_matrix(
+        self,
+        from_timeline: bool = False,
+    ):
         """
         This function creates a separate biosphere matrix, with the dimenions (bio_flows at a specific timestep) x (processes).
         Every temporally resolved biosphere flow has its own row in the matrix, making it highly sparse.
         The timing of the emitting process and potential additional temporal information of the bioshpere flow (e.g. delay of emission compared to timing of process) are considered.
-        
+
         Parameters
         ----------
-        None
+        from_timeline
 
         Returns
         -------
         dynamic_biomatrix : scipy.sparse.csr_matrix
             A sparse matrix with the dimensions (bio_flows at a specific timestep) x (processes), where every row represents a biosphere flow at a specific time.
         """
 
-        for id in self.node_id_collection_dict["temporalized_processes"]:
-            process_col_index = self.activity_dict[id]  # get the matrix column index
+        for row in self.timeline.itertuples():
+            idx = row.time_mapped_producer
+            if from_timeline:
+                process_col_index = row.Index
+            else:
+                process_col_index = self.activity_dict[
+                    idx
+                ]  # get the matrix column index
 
             (
                 (original_db, original_code),
                 time,
             ) = self.activity_time_mapping_dict.reversed()[  # time is here an integer, with various length depending on temporal grouping, e.g. [Y] -> 2024, [M] - > 202401
-                id
+                idx
             ]
-
-            time_in_datetime = convert_date_string_to_datetime(
-                self.temporal_grouping, str(time)
-            )  # now time is a datetime
-
-            td_producer = TemporalDistribution(
-                date=np.array([time_in_datetime], dtype=self.time_res),
-                amount=np.array([1]),
-            ).date
-            date = td_producer[0]
-
-            act = bd.get_node(database=original_db, code=original_code)
-
-            for exc in act.biosphere():
-                if exc.get("temporal_distribution"):
-                    td_dates = exc["temporal_distribution"].date  # time_delta
-                    td_values = exc["temporal_distribution"].amount
-                    dates = (
-                        td_producer + td_dates
-                    )  # we can add a datetime of length 1 to a timedelta of length N without problems
-                    values = exc["amount"] * td_values
-
-                else:  # exchange has no TD
-                    dates = td_producer  # datetime array, same time as producer
-                    values = [exc["amount"]]
-
-                # Add entries to dynamic bio matrix
-                for date, amount in zip(dates, values):
-
-                    # first create a row index for the tuple((db, bioflow), date))
-                    time_mapped_matrix_id = self.biosphere_time_mapping_dict.add(
-                        (exc.input, date)
-                    )
-
-                    # populate lists with which sparse matrix is constructed
-                    self.add_matrix_entry_for_biosphere_flows(
-                        row=time_mapped_matrix_id,
-                        col=process_col_index,
-                        amount=amount,
-                    )
-
-        for id in self.node_id_collection_dict["temporal_markets"]:
-            process_col_index = self.activity_dict[id]  # get the matrix column index
-            technosphere_column = (
-                self.technosphere_matrix[:, process_col_index].toarray().flatten()
-            )  # 1-d np.array
-            demand = dict()
-            for idx, amount in enumerate(technosphere_column):
-                if idx == self.activity_dict[id]:  # Skip production exchange
-                    continue
-                if amount == 0:
-                    continue
-
-                node_id = self.activity_dict.reversed[idx]
-
-                if (
-                    node_id in self.node_id_collection_dict["foreground_node_ids"]
-                ):  # We only aggregate background process bioflows
-                    continue
-
-                # demand[bd.get_node(id=node_id)] = -amount
-                demand[node_id] = -amount
-
-            self.lca_obj.redo_lci(demand)
-            aggregated_inventory = self.lca_obj.inventory.sum(
-                axis=1
-            )  # aggregated biosphere flows of background supply chain emissions. Rows are bioflows.
-
-            for idx, amount in enumerate(aggregated_inventory.flatten().tolist()[0]):
-                bioflow = bd.get_activity(self.lca_obj.dicts.biosphere.reversed[idx])
-                ((_, _), time) = self.activity_time_mapping_dict.reversed()[id]
+            if idx in self.node_id_collection_dict["temporalized_processes"]:
 
                 time_in_datetime = convert_date_string_to_datetime(
                     self.temporal_grouping, str(time)
                 )  # now time is a datetime
 
                 td_producer = TemporalDistribution(
-                    date=np.array([str(time_in_datetime)], dtype=self.time_res),
+                    date=np.array([time_in_datetime], dtype=self.time_res),
                     amount=np.array([1]),
-                ).date  # TODO: Simplify
+                ).date
                 date = td_producer[0]
 
-                time_mapped_matrix_id = self.biosphere_time_mapping_dict.add(
-                    (bioflow, date)
-                )
+                act = bd.get_node(database=original_db, code=original_code)
+
+                for exc in act.biosphere():
+                    if exc.get("temporal_distribution"):
+                        td_dates = exc["temporal_distribution"].date  # time_delta
+                        td_values = exc["temporal_distribution"].amount
+                        dates = (
+                            td_producer + td_dates
+                        )  # we can add a datetime of length 1 to a timedelta of length N without problems
+                        values = exc["amount"] * td_values
+
+                    else:  # exchange has no TD
+                        dates = td_producer  # datetime array, same time as producer
+                        values = [exc["amount"]]
+
+                    # Add entries to dynamic bio matrix
+                    for date, amount in zip(dates, values):
+
+                        # first create a row index for the tuple((db, bioflow), date))
+                        time_mapped_matrix_id = self.biosphere_time_mapping_dict.add(
+                            (exc.input.id, date)
+                        )
+
+                        # populate lists with which sparse matrix is constructed
+                        self.add_matrix_entry_for_biosphere_flows(
+                            row=time_mapped_matrix_id,
+                            col=process_col_index,
+                            amount=amount,
+                        )
+            elif idx in self.node_id_collection_dict["temporal_markets"]:
+                if from_timeline:
+                    demand = self.demand_from_timeline(row, original_db)
+                else:
+                    demand = self.demand_from_technosphere(idx)
+
+                self.lca_obj.redo_lci(demand)
+
+                aggregated_inventory = self.lca_obj.inventory.sum(
+                    axis=1
+                )  # aggregated biosphere flows of background supply chain emissions. Rows are bioflows.
+
+                for row_idx, amount in enumerate(aggregated_inventory.A1):
+                    bioflow = self.lca_obj.dicts.biosphere.reversed[row_idx]
+                    ((_, _), time) = self.activity_time_mapping_dict.reversed()[idx]
+
+                    time_in_datetime = convert_date_string_to_datetime(
+                        self.temporal_grouping, str(time)
+                    )  # now time is a datetime
+
+                    td_producer = TemporalDistribution(
+                        date=np.array([str(time_in_datetime)], dtype=self.time_res),
+                        amount=np.array([1]),
+                    ).date  # TODO: Simplify
+                    date = td_producer[0]
 
-                self.add_matrix_entry_for_biosphere_flows(
-                    row=time_mapped_matrix_id, col=process_col_index, amount=amount
-                )
+                    time_mapped_matrix_id = self.biosphere_time_mapping_dict.add(
+                        (bioflow, date)
+                    )
+
+                    self.add_matrix_entry_for_biosphere_flows(
+                        row=time_mapped_matrix_id, col=process_col_index, amount=amount
+                    )
 
         # now build the dynamic biosphere matrix
-        shape = (max(self.rows) + 1, len(self.activity_time_mapping_dict))
+        if from_timeline:
+            ncols = len(self.timeline)
+        else:
+            ncols = len(self.activity_time_mapping_dict)
+        shape = (max(self.rows) + 1, ncols)
         dynamic_biomatrix = sp.coo_matrix((self.values, (self.rows, self.cols)), shape)
         self.dynamic_biomatrix = dynamic_biomatrix.tocsr()
 
         return self.dynamic_biomatrix
 
+    def demand_from_timeline(self, row, original_db):
+        """
+        Returns a demand dict directly from the timeline row
+        and its interpolation_weights.
+        """
+        demand = {}
+        for db, amount in row.interpolation_weights.items():
+            # if not db in act_time_combinations.get(original_code):  #check if act time combination already exists
+            [(timed_act_id, _)] = [
+                (act, db_name)
+                for (act, db_name) in self.interdatabase_activity_mapping[
+                    (row.producer, original_db)
+                ]
+                if db == db_name
+            ]
+            # t_act = bd.get_activity(timed_act_id)
+            demand[timed_act_id] = amount
+        return demand
+
+    def demand_from_technosphere(self, idx):
+        """
+        Returns a demand dict based on the technosphere colummn.
+        """
+        process_col_index = self.activity_dict[idx]  # get the matrix column index
+        technosphere_column = (
+            self.technosphere_matrix[:, process_col_index].toarray().ravel()
+        )  # 1-d np.array
+        demand = {}
+        for row_idx, amount in enumerate(technosphere_column):
+            if row_idx == self.activity_dict[idx]:  # Skip production exchange
+                continue
+            if amount == 0:
+                continue
+
+            node_id = self.activity_dict.reversed[row_idx]
+
+            if (
+                node_id in self.node_id_collection_dict["foreground_node_ids"]
+            ):  # We only aggregate background process bioflows
+                continue
+
+            # demand[bd.get_node(id=node_id)] = -amount
+            demand[node_id] = -amount
+        return demand
+
     def add_matrix_entry_for_biosphere_flows(self, row, col, amount):
         """
         Adds an entry to the lists of row, col and values, which are then used to construct the dynamic biosphere matrix.