From cb5008472debb2a4dc0e75a0dbffa16b34e9b61a Mon Sep 17 00:00:00 2001 From: Jeremy Jacobson <85139244+jjacobson95@users.noreply.github.com> Date: Fri, 22 Mar 2024 09:54:04 -0700 Subject: [PATCH 1/3] Update msgapfill.py --- modelseedpy/core/msgapfill.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index 5c79fb9c..79200d6e 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -11,6 +11,7 @@ from modelseedpy.fbapkg.mspackagemanager import MSPackageManager from modelseedpy.core.msmodelutl import MSModelUtil from modelseedpy.core.exceptions import GapfillingError +from collections import defaultdict logger = logging.getLogger(__name__) logger.setLevel( @@ -447,12 +448,21 @@ def compute_reaction_weights_from_expression_data(self, omics_data, annoont): p = np.zeros(len(restructured_anoot["Reactions"])) # computed_weights is the rxn_hash ({rxn: weight, ...}) computed_weights = {} + + # Precompute gene reaction lookups + gene_reaction_lookup = {} + for idx, row in restructured_anoot.iterrows(): + gene = row['Gene'] + reaction = row['Reactions'] + if gene in gene_reaction_lookup: + gene_reaction_lookup[gene].append(reaction) + else: + gene_reaction_lookup[gene] = [reaction] + for rxn in range(0, len(restructured_anoot)): substr_rxns = [rxn for rxn in restructured_anoot["Reactions"][[rxn]]] # Get the indices of the rows where the condition is True - mask = restructured_anoot["Reactions"].apply( - lambda x: any(substr in x for substr in substr_rxns) - ) + mask = restructured_anoot["Reactions"] == substr_rxns[0] idx_gene = mask[mask].index nAG = 0 nMG = 0 @@ -476,11 +486,10 @@ def compute_reaction_weights_from_expression_data(self, omics_data, annoont): selected_gene = restructured_anoot["Gene"].iloc[idx_gene[iGene]] # Finding reactions associated with genes that contain the selected gene - associated_reactions = restructured_anoot["Reactions"][ - restructured_anoot["Gene"].str.contains(selected_gene) - ] + associated_reactions = gene_reaction_lookup.get(selected_gene, []) + # Checking if there are more than one unique reactions - if len(associated_reactions.unique()) > 1: + if len(associated_reactions) > 1: nCG += 1 p[rxn] = (nMG / nAG) * (1 / (1 + (nCG / nAG))) From fc1019ba1703f0547809de3764624fee95d7e00b Mon Sep 17 00:00:00 2001 From: Jeremy Date: Fri, 22 Mar 2024 09:55:18 -0700 Subject: [PATCH 2/3] linted --- modelseedpy/core/msgapfill.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index 79200d6e..4e94b069 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -452,13 +452,13 @@ def compute_reaction_weights_from_expression_data(self, omics_data, annoont): # Precompute gene reaction lookups gene_reaction_lookup = {} for idx, row in restructured_anoot.iterrows(): - gene = row['Gene'] - reaction = row['Reactions'] + gene = row["Gene"] + reaction = row["Reactions"] if gene in gene_reaction_lookup: gene_reaction_lookup[gene].append(reaction) else: gene_reaction_lookup[gene] = [reaction] - + for rxn in range(0, len(restructured_anoot)): substr_rxns = [rxn for rxn in restructured_anoot["Reactions"][[rxn]]] # Get the indices of the rows where the condition is True @@ -487,7 +487,7 @@ def compute_reaction_weights_from_expression_data(self, omics_data, annoont): # Finding reactions associated with genes that contain the selected gene associated_reactions = gene_reaction_lookup.get(selected_gene, []) - + # Checking if there are more than one unique reactions if len(associated_reactions) > 1: nCG += 1 From 6738874a02b583e66a83c12e45b4607ed95afa86 Mon Sep 17 00:00:00 2001 From: Jeremy Date: Fri, 22 Mar 2024 10:17:26 -0700 Subject: [PATCH 3/3] re-linted msgapfill.py --- modelseedpy/core/msgapfill.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index 4e94b069..ee1b0fe3 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -13,6 +13,7 @@ from modelseedpy.core.exceptions import GapfillingError from collections import defaultdict + logger = logging.getLogger(__name__) logger.setLevel( logging.INFO # WARNING @@ -131,9 +132,9 @@ def test_gapfill_database(self, media, target=None, before_filtering=True): if before_filtering: filter_msg = " before filtering " note = "FBF" - gf_sensitivity[media.id][target][note] = ( - self.mdlutl.find_unproducible_biomass_compounds(target) - ) + gf_sensitivity[media.id][target][ + note + ] = self.mdlutl.find_unproducible_biomass_compounds(target) if target != "rxn00062_c0": self.mdlutl.save_attributes(gf_sensitivity, "gf_sensitivity") logger.warning( @@ -388,10 +389,10 @@ def integrate_gapfill_solution( gf_sensitivity[solution["media"].id] = {} if solution["target"] not in gf_sensitivity[solution["media"].id]: gf_sensitivity[solution["media"].id][solution["target"]] = {} - gf_sensitivity[solution["media"].id][solution["target"]]["success"] = ( - self.mdlutl.find_unproducible_biomass_compounds( - solution["target"], cumulative_solution - ) + gf_sensitivity[solution["media"].id][solution["target"]][ + "success" + ] = self.mdlutl.find_unproducible_biomass_compounds( + solution["target"], cumulative_solution ) self.mdlutl.save_attributes(gf_sensitivity, "gf_sensitivity") self.cumulative_gapfilling.extend(cumulative_solution)