From cb5008472debb2a4dc0e75a0dbffa16b34e9b61a Mon Sep 17 00:00:00 2001
From: Jeremy Jacobson <85139244+jjacobson95@users.noreply.github.com>
Date: Fri, 22 Mar 2024 09:54:04 -0700
Subject: [PATCH 1/3] Update msgapfill.py

---
 modelseedpy/core/msgapfill.py | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py
index 5c79fb9c..79200d6e 100644
--- a/modelseedpy/core/msgapfill.py
+++ b/modelseedpy/core/msgapfill.py
@@ -11,6 +11,7 @@
 from modelseedpy.fbapkg.mspackagemanager import MSPackageManager
 from modelseedpy.core.msmodelutl import MSModelUtil
 from modelseedpy.core.exceptions import GapfillingError
+from collections import defaultdict
 
 logger = logging.getLogger(__name__)
 logger.setLevel(
@@ -447,12 +448,21 @@ def compute_reaction_weights_from_expression_data(self, omics_data, annoont):
         p = np.zeros(len(restructured_anoot["Reactions"]))
         # computed_weights is the rxn_hash ({rxn: weight, ...})
         computed_weights = {}
+
+        # Precompute gene reaction lookups
+        gene_reaction_lookup = {}
+        for idx, row in restructured_anoot.iterrows():
+            gene = row['Gene']
+            reaction = row['Reactions']
+            if gene in gene_reaction_lookup:
+                gene_reaction_lookup[gene].append(reaction)
+            else:
+                gene_reaction_lookup[gene] = [reaction]
+        
         for rxn in range(0, len(restructured_anoot)):
             substr_rxns = [rxn for rxn in restructured_anoot["Reactions"][[rxn]]]
             # Get the indices of the rows where the condition is True
-            mask = restructured_anoot["Reactions"].apply(
-                lambda x: any(substr in x for substr in substr_rxns)
-            )
+            mask = restructured_anoot["Reactions"] == substr_rxns[0]
             idx_gene = mask[mask].index
             nAG = 0
             nMG = 0
@@ -476,11 +486,10 @@ def compute_reaction_weights_from_expression_data(self, omics_data, annoont):
                     selected_gene = restructured_anoot["Gene"].iloc[idx_gene[iGene]]
 
                     # Finding reactions associated with genes that contain the selected gene
-                    associated_reactions = restructured_anoot["Reactions"][
-                        restructured_anoot["Gene"].str.contains(selected_gene)
-                    ]
+                    associated_reactions = gene_reaction_lookup.get(selected_gene, [])
+                    
                     # Checking if there are more than one unique reactions
-                    if len(associated_reactions.unique()) > 1:
+                    if len(associated_reactions) > 1:
                         nCG += 1
 
                 p[rxn] = (nMG / nAG) * (1 / (1 + (nCG / nAG)))

From fc1019ba1703f0547809de3764624fee95d7e00b Mon Sep 17 00:00:00 2001
From: Jeremy <jeremy.jacobson3402@gmail.com>
Date: Fri, 22 Mar 2024 09:55:18 -0700
Subject: [PATCH 2/3] linted

---
 modelseedpy/core/msgapfill.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py
index 79200d6e..4e94b069 100644
--- a/modelseedpy/core/msgapfill.py
+++ b/modelseedpy/core/msgapfill.py
@@ -452,13 +452,13 @@ def compute_reaction_weights_from_expression_data(self, omics_data, annoont):
         # Precompute gene reaction lookups
         gene_reaction_lookup = {}
         for idx, row in restructured_anoot.iterrows():
-            gene = row['Gene']
-            reaction = row['Reactions']
+            gene = row["Gene"]
+            reaction = row["Reactions"]
             if gene in gene_reaction_lookup:
                 gene_reaction_lookup[gene].append(reaction)
             else:
                 gene_reaction_lookup[gene] = [reaction]
-        
+
         for rxn in range(0, len(restructured_anoot)):
             substr_rxns = [rxn for rxn in restructured_anoot["Reactions"][[rxn]]]
             # Get the indices of the rows where the condition is True
@@ -487,7 +487,7 @@ def compute_reaction_weights_from_expression_data(self, omics_data, annoont):
 
                     # Finding reactions associated with genes that contain the selected gene
                     associated_reactions = gene_reaction_lookup.get(selected_gene, [])
-                    
+
                     # Checking if there are more than one unique reactions
                     if len(associated_reactions) > 1:
                         nCG += 1

From 6738874a02b583e66a83c12e45b4607ed95afa86 Mon Sep 17 00:00:00 2001
From: Jeremy <jeremy.jacobson3402@gmail.com>
Date: Fri, 22 Mar 2024 10:17:26 -0700
Subject: [PATCH 3/3] re-linted msgapfill.py

---
 modelseedpy/core/msgapfill.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py
index 4e94b069..ee1b0fe3 100644
--- a/modelseedpy/core/msgapfill.py
+++ b/modelseedpy/core/msgapfill.py
@@ -13,6 +13,7 @@
 from modelseedpy.core.exceptions import GapfillingError
 from collections import defaultdict
 
+
 logger = logging.getLogger(__name__)
 logger.setLevel(
     logging.INFO  # WARNING
@@ -131,9 +132,9 @@ def test_gapfill_database(self, media, target=None, before_filtering=True):
         if before_filtering:
             filter_msg = " before filtering "
             note = "FBF"
-        gf_sensitivity[media.id][target][note] = (
-            self.mdlutl.find_unproducible_biomass_compounds(target)
-        )
+        gf_sensitivity[media.id][target][
+            note
+        ] = self.mdlutl.find_unproducible_biomass_compounds(target)
         if target != "rxn00062_c0":
             self.mdlutl.save_attributes(gf_sensitivity, "gf_sensitivity")
         logger.warning(
@@ -388,10 +389,10 @@ def integrate_gapfill_solution(
                 gf_sensitivity[solution["media"].id] = {}
             if solution["target"] not in gf_sensitivity[solution["media"].id]:
                 gf_sensitivity[solution["media"].id][solution["target"]] = {}
-            gf_sensitivity[solution["media"].id][solution["target"]]["success"] = (
-                self.mdlutl.find_unproducible_biomass_compounds(
-                    solution["target"], cumulative_solution
-                )
+            gf_sensitivity[solution["media"].id][solution["target"]][
+                "success"
+            ] = self.mdlutl.find_unproducible_biomass_compounds(
+                solution["target"], cumulative_solution
             )
             self.mdlutl.save_attributes(gf_sensitivity, "gf_sensitivity")
         self.cumulative_gapfilling.extend(cumulative_solution)