updates

iAMB-RWTH-Aachen · May 16, 2024 · 134b24e · 134b24e
1 parent eda64b5
commit 134b24e
Show file tree

Hide file tree

Showing 5 changed files with 206 additions and 87 deletions.
diff --git a/Scripts/ecoli_tam_incl_transcript_info.py b/Scripts/ecoli_tam_incl_transcript_info.py
@@ -1,5 +1,6 @@
 import matplotlib.pyplot as plt
 import pandas as pd
+import numpy as np
 import os
 import cobra
 
@@ -68,6 +69,24 @@ def get_transcript_data(transcript_file_path:str = TRANSCRIPT_FILE_PATH, mmol =
     else:
         return expression_data_normalized
 
+def get_differentially_expressed_transcript_data(strain):
+    transcript_data = get_transcript_data()
+    transcript_data_diff = transcript_data.assign(logfc = lambda x: np.log10(x[strain]/x['REF']))
+    # transcript_data_diff = transcript_data_diff.sort_values('logfc', ascending=False)
+    # plot histogram
+    bins = 100
+
+    # plt.hist(transcript_data_diff.logfc, bins=bins)
+    # plt.xlabel('logfc')
+    # plt.ylabel('number of genes')
+    # plt.show()
+    transcript_data_diff = transcript_data_diff[transcript_data_diff.logfc.abs()>0.05]
+    # print(transcript_data_diff)
+    # print(len(transcript_data_diff))
+
+    return transcript_data_diff
+
+
 def get_flux_data(flux_file_path:str = FLUX_FILE_PATH,
                         reference: str = 'Holm et al'):
     expression_data = pd.read_excel(flux_file_path, sheet_name=reference, index_col=0)
@@ -79,21 +98,22 @@ def get_flux_data(flux_file_path:str = FLUX_FILE_PATH,
 def get_pam_fluxes(substrate_uptake_rate, strain ='REF'):
     pam = set_up_ecoli_pam()
     pam.change_reaction_bounds('EX_glc__D_e', lower_bound=-substrate_uptake_rate, upper_bound=0)
-    if strain == 'NOX':
-        add_nox_protein_to_model(pam)
+    # if strain == 'NOX':
+    #     add_nox_protein_to_model(pam)
     sol = pam.optimize()
     pam_fluxes = sol.fluxes
     return pam_fluxes,pam
 
 def set_up_tamodel(substrate_uptake_rate, strain ='REF'):
     tam = set_up_ecoli_tam()
     tam.change_reaction_bounds('EX_glc__D_e', lower_bound=-substrate_uptake_rate, upper_bound=0)
-    if strain == 'NOX':
-        add_nox_protein_to_model(tam)
-        add_nox_transcript_to_model(tam)
-        tam.transcripts.get_by_id('mRNA_nox').change_concentration(1e-3, 1e-5)
+    # if strain == 'NOX':
+    #     add_nox_protein_to_model(tam)
+    #     add_nox_transcript_to_model(tam)
+    #     tam.transcripts.get_by_id('mRNA_nox').change_concentration(1e-3, 1e-5)
 
-    transcript_data_mmol = get_transcript_data()
+    # transcript_data_mmol = get_transcript_data()
+    transcript_data_mmol = get_differentially_expressed_transcript_data(strain)
     for gene, expression_data in transcript_data_mmol.iterrows():
         transcript_id = 'mRNA_' + gene
         if not transcript_id in tam.transcripts: continue
@@ -219,7 +239,7 @@ def plot_flux_comparison(flux_df_abs, flux_df_rel, strain):
 if __name__ == '__main__':
 
     print('Reference condition')
-    compare_fluxes_holm_reference(strain = 'REF', plot =True)
+    compare_fluxes_holm_reference(strain = 'NOX', plot =True)
     print('\n-------------------------------------------------------------------------------------------------')
     # print('mutation 1: NOX strain (overexpression of NADH oxidase)\n')
     # compare_fluxes_holm_reference('NOX', plot=False)

diff --git a/Scripts/ecolicore_tam_incl_transcript_info.py b/Scripts/ecolicore_tam_incl_transcript_info.py
@@ -192,7 +192,7 @@ def plot_flux_comparison(flux_df_abs, flux_df_rel, strain):
 if __name__ == '__main__':
 
     print('Reference condition')
-    compare_fluxes_holm_reference(strain = 'NOX', plot =False)
+    compare_fluxes_holm_reference(strain = 'REF', plot =False)
     print('\n-------------------------------------------------------------------------------------------------')
     # print('mutation 1: NOX strain (overexpression of NADH oxidase)\n')
     # compare_fluxes_holm_reference('NOX', plot=False)

diff --git a/src/PAModelpy/PAModel.py b/src/PAModelpy/PAModel.py
@@ -840,19 +840,16 @@ def calculate_csc(self, obj_value, mu, mu_ub, mu_lb, mu_ec_f, mu_ec_b):
 
         Definition: constraint_UB*shadowprice/obj_value.
 
-        :param obj_value: Float
-        :param mu: DataFrame
-            Shadowprices for all constraints
-        :param mu_ub: DataFrame
-            Shadowprices for the reaction UB constraints
-        :param mu_lb: DataFrame
-            Shadowprices for the reaction LB constraints
-        :param mu_ec_f: DataFrame
-            Shadowprices for the constraint related to an enzymatic catalysis of the forward reaction
-        :param mu_ec_b: DataFrame
-            Shadowprices for the constraint related to an enzymatic catalysis of the backward reaction
-
         Results will be saved in the self.capacity_sensitivity_coefficients attribute as a dataframe
+
+        Args:
+            obj_value: Float: optimal objective value, commonly maximal growth rate under specific conditions
+            mu: DataFrame: shadowprices for all constraints
+            mu_ub: DataFrame: Shadowprices for the reaction UB constraints
+            mu_lb: DataFrame: Shadowprices for the reaction LB constraints
+            mu_ec_f: DataFrame: Shadowprices for the constraint related to an enzymatic catalysis of the forward reaction
+            mu_ec_b: DataFrame: Shadowprices for the constraint related to an enzymatic catalysis of the backward reaction
+
         """
         self.capacity_sensitivity_coefficients = pd.DataFrame(columns=['rxn_id', 'enzyme_id', 'constraint', 'coefficient'])
         # add capacity sensitivity coefficients for sectors if they are there
@@ -897,24 +894,55 @@ def calculate_csc(self, obj_value, mu, mu_ub, mu_lb, mu_ec_f, mu_ec_b):
             self.capacity_sensitivity_coefficients.loc[len(self.capacity_sensitivity_coefficients)] = new_row_LB
 
         for enzyme in self.enzymes:
-            # get reactions associated with this enzyme
-            reactions = ','.join(self.get_reactions_with_enzyme_id(enzyme.id))
-            # get the right row from the shadow price dataframes
-            # min constraints can be skipped as they always equal to 0 (the enzymes cannot have a concentration lower than 0)
-            mu_ec_max_row = mu_ec_f[mu_ec_f['index'] == f'{enzyme.id}_max']
-            mu_ec_min_row = mu_ec_b[mu_ec_b['index'] == f'{enzyme.id}_min']
-
-            # max Enzyme constraint
-            ca_coefficient_EC_max= self.constraints[f'{enzyme.id}_max'].ub * mu_ec_max_row['shadow_prices'].iloc[0] / obj_value
-            new_enzyme_row_EC_max =[reactions, enzyme.id, 'enzyme_max', ca_coefficient_EC_max]
-            # add new_row to dataframe
-            self.capacity_sensitivity_coefficients.loc[len(self.capacity_sensitivity_coefficients)] = new_enzyme_row_EC_max
+            self.calculate_enzyme_csc(enzyme, mu_ec_f, mu_ec_b, obj_value)
 
-            # min Enzyme constraint
-            ca_coefficient_EC_min = self.constraints[f'{enzyme.id}_min'].ub * mu_ec_min_row['shadow_prices'].iloc[0] / obj_value
-            new_enzyme_row_EC_min =[reactions, enzyme.id, 'enzyme_min', ca_coefficient_EC_min]
-            # add new_row to dataframe
-            self.capacity_sensitivity_coefficients.loc[len(self.capacity_sensitivity_coefficients)] = new_enzyme_row_EC_min
+    def calculate_csc_for_molecule(self, molecule: Union[Enzyme],
+                                   mu_min:pd.DataFrame, mu_max:pd.DataFrame, obj_value:float,
+                                   constraint_type:str, associated_reactions:str):
+        """
+        Calculate the capacity sensitivity coefficients (CSCs) for constraints related to a biomolecule,
+        such as enzymes. These coefficients reflect the effect of infitesmal changes in the constraint bounds
+        on the objective function.
+
+        The coefficients and associated reactions will be saved in the capacity_sensitivity_coefficients dataframe.
+
+        Args:
+           enzyme:Enzyme: enzyme object to calculate CSC for
+           mu_min: DataFrame: Shadowprices for the constraint related to a lower bound/minimum
+           mu_max: DataFrame: Shadowprices for the constraint related to an upper bound/maximum
+           obj_value: float: optimal objective value, commonly maximal growth rate under specific conditions
+        """
+        # get the right row from the shadow price dataframes
+        mu_max_row = mu_max[mu_max['index'] == f'{molecule.id}_max']
+        mu_min_row = mu_min[mu_min['index'] == f'{molecule.id}_min']
+
+        # Calculate sensitivity coefficients for maximum constraint
+        ca_coefficient_max = self.constraints[f'{molecule.id}_max'].ub * mu_max_row['shadow_prices'].iloc[0] / obj_value
+        new_row_max = [associated_reactions, molecule.id, f'{constraint_type}_max', ca_coefficient_max]
+        self.capacity_sensitivity_coefficients.loc[len(self.capacity_sensitivity_coefficients)] = new_row_max
+
+        # Calculate sensitivity coefficients for minimum constraint
+        ca_coefficient_min = self.constraints[f'{molecule.id}_min'].ub * mu_min_row['shadow_prices'].iloc[0] / obj_value
+        new_row_min = [associated_reactions, molecule.id, f'{constraint_type}_min', ca_coefficient_min]
+        self.capacity_sensitivity_coefficients.loc[len(self.capacity_sensitivity_coefficients)] = new_row_min
+
+
+    def calculate_enzyme_csc(self, enzyme:Enzyme, mu_ec_f:pd.DataFrame, mu_ec_b:pd.DataFrame, obj_value:float):
+        """
+        Calculate the capacity sensitivity coefficients (CSCs) for constraints related to enzyme. These coefficients
+        reflect the effect of infitesmal changes in the constraint bounds on the objective function. The coefficients
+        and associated reactions will be saved in the capacity_sensitivity_coefficients dataframe.
+
+        The function makes use of the abstracted function calculate_csc_for_molecule
+
+        Args:
+            enzyme:Enzyme: enzyme object to calculate CSC for
+            mu_ec_f: DataFrame: Shadowprices for the constraint related to an enzymatic catalysis of the forward reaction
+            mu_ec_b: DataFrame: Shadowprices for the constraint related to an enzymatic catalysis of the backward reaction
+            obj_value: float: optimal objective value, commonly maximal growth rate under specific conditions
+        """
+        reactions = ','.join(self.get_reactions_with_enzyme_id(enzyme.id))
+        self.calculate_csc_for_molecule(enzyme, mu_ec_b, mu_ec_f, obj_value, 'enzyme', reactions)
 
     def calculate_esc(self, obj_value, mu_ec_f, mu_ec_b):
         """

diff --git a/src/TAModelpy/TAModel.py b/src/TAModelpy/TAModel.py
@@ -3,6 +3,7 @@
 
 import cobra
 import numpy as np
+import pandas as pd
 from cobra import Object, DictList, Gene
 from optlang.symbolics import Zero
 import re
@@ -265,39 +266,6 @@ def add_transcript_enzyme_relations(self, enzyme: Union[Enzyme, EnzymeComplex])
         for relation, transcripts in transcript_associated_with_enzyme.items():
             self.make_mrna_max_constraint(enzyme, transcripts)
 
-    def set_transcript_enzyme_min_relation(self, transcript: Transcript):
-        transcripts = [transcript]
-        if len(transcript._lumped_transcripts) > 0:
-            transcripts = transcript._lumped_transcripts
-        for transcript in transcripts:
-            if f'{transcript.id}_min' not in self.constraints.keys():
-                min_constraint = self.problem.Constraint(Zero, name=f'{transcript.id}_min', lb=-1e6, ub=0)
-
-                self.add_cons_vars([min_constraint])
-
-                self.constraints[f'{transcript.id}_min'].set_linear_coefficients({
-                    transcript.mrna_variable: transcript.f_min
-                })
-
-            for enz in transcript.enzymes:
-                # make separate constraints for forward and reverse enzyme variables
-                fwd_var = self.enzyme_variables.get_by_id(enz.id).forward_variable
-                rev_var = self.enzyme_variables.get_by_id(enz.id).reverse_variable
-
-                if f'{transcript.id}_min' not in self.constraints.keys():
-                    min_constraint = self.problem.Constraint(Zero, name=f'{transcript.id}_min', lb=-1e6, ub=0)
-                    self.add_cons_vars([min_constraint])
-
-                self.constraints[f'{transcript.id}_min'].set_linear_coefficients({
-                    fwd_var: -1,
-                    rev_var: -1
-                })
-
-            # save the constraints in the transcript object
-            transcript._constraints = {**transcript._constraints,
-                                       **{f'{transcript.id}_min': self.constraints[f'{transcript.id}_min']
-                                          }}
-
 
     def add_total_mrna_constraint(self, mrna_sector:ActivemRNASector) -> None:
         """Adds a constraint to limit the total mRNA abundance.
@@ -359,6 +327,39 @@ def _check_if_gene_in_enzyme_genes(self, gene_id: str,
                     return True
         return False
 
+    def set_transcript_enzyme_min_relation(self, transcript: Transcript):
+        transcripts = [transcript]
+        if len(transcript._lumped_transcripts) > 0:
+            transcripts = transcript._lumped_transcripts
+        for transcript in transcripts:
+            if f'{transcript.id}_min' not in self.constraints.keys():
+                min_constraint = self.problem.Constraint(Zero, name=f'{transcript.id}_min', lb=-1e6, ub=0)
+
+                self.add_cons_vars([min_constraint])
+
+                self.constraints[f'{transcript.id}_min'].set_linear_coefficients({
+                    transcript.mrna_variable: transcript.f_min
+                })
+
+            for enz in transcript.enzymes:
+                # make separate constraints for forward and reverse enzyme variables
+                fwd_var = self.enzyme_variables.get_by_id(enz.id).forward_variable
+                rev_var = self.enzyme_variables.get_by_id(enz.id).reverse_variable
+
+                if f'{transcript.id}_min' not in self.constraints.keys():
+                    min_constraint = self.problem.Constraint(Zero, name=f'{transcript.id}_min', lb=-1e6, ub=0)
+                    self.add_cons_vars([min_constraint])
+
+                self.constraints[f'{transcript.id}_min'].set_linear_coefficients({
+                    fwd_var: -1,
+                    rev_var: -1
+                })
+
+            # save the constraints in the transcript object
+            transcript._constraints = {**transcript._constraints,
+                                       **{f'{transcript.id}_min': self.constraints[f'{transcript.id}_min']
+                                          }}
+
     def make_mrna_max_constraint(self, enz: Enzyme,
                                  transcripts:Union[Transcript, list]) -> Union[Transcript, list]:
         """
@@ -630,31 +631,40 @@ def _gene_is_in_enzyme_complex(self, gene: Union[str, Gene], enzyme: Enzyme) ->
         else:
             return False
 
-    def calculate_csc(self, obj_value, mu, mu_ub, mu_lb, mu_ec_f, mu_ec_b):
+    def calculate_csc(self, obj_value, mu, mu_ub, mu_lb, mu_ec_f, mu_ec_b) -> None:
         """
         Calculates the capacity sensitivity coefficient for all inequality constraints in the model.
         The sum of all capacity sensitivity coefficients should equal 1 for growth maximization.
 
         Definition: constraint_UB*shadowprice/obj_value.
 
-        Inherits from the PAModel calculate_csc function and adds the calculation of the csc for the total rna
-        constraint.
-
-        :param obj_value: Float
-        :param mu: DataFrame
-            Shadowprices for all constraints
-        :param mu_ub: DataFrame
-            Shadowprices for the reaction UB constraints
-        :param mu_lb: DataFrame
-            Shadowprices for the reaction LB constraints
-        :param mu_ec_f: DataFrame
-            Shadowprices for the constraint related to an enzymatic catalysis of the forward reaction
-        :param mu_ec_b: DataFrame
-            Shadowprices for the constraint related to an enzymatic catalysis of the backward reaction
+        Inherits from the PAModel calculate_csc function and adds the calculation of the csc for the total mrna
+        and individual transcript-enzyme constraint
+
+        Args:
+            obj_value: Float: optimal objective value, commonly maximal growth rate under specific conditions
+            mu: DataFrame: shadowprices for all constraints
+            mu_ub: DataFrame: Shadowprices for the reaction UB constraints
+            mu_lb: DataFrame: Shadowprices for the reaction LB constraints
+            mu_ec_f: DataFrame: Shadowprices for the constraint related to an enzymatic catalysis of the forward
+                reaction.This also includes the maximal transcript constraints (representing the maximal number of
+                proteins which can be translated from a single mRNA)
+            mu_ec_b: DataFrame: Shadowprices for the constraint related to an enzymatic catalysis of the backward
+                reaction.This also includes the minimal transcript constraints (representing the minimal number of
+                proteins which can be translated from a single mRNA).
 
         Results will be saved in the self.capacity_sensitivity_coefficients attribute as a dataframe
         """
+        #split enzyme and mrna constraints
+        mu_mrna_max = mu_ec_f[mu_ec_f['rxn_id'].str.contains('mRNA')]
+        mu_ec_f = mu_ec_f[~mu_ec_f['rxn_id'].str.contains('mRNA')]
+
+        mu_mrna_min = mu_ec_b[mu_ec_b['rxn_id'].str.contains('mRNA')]
+        mu_ec_b = mu_ec_b[~mu_ec_b['rxn_id'].str.contains('mRNA')]
+
         super().calculate_csc(obj_value, mu, mu_ub, mu_lb, mu_ec_f, mu_ec_b)
+
+        #TAModel specific constraints
         if self.TOTAL_MRNA_CONSTRAINT_ID in self.constraints.keys():
             constraint = 'mRNA'
             rxn_id = self.TOTAL_MRNA_CONSTRAINT_ID
@@ -665,5 +675,30 @@ def calculate_csc(self, obj_value, mu, mu_ub, mu_lb, mu_ec_f, mu_ec_b):
             new_row = [rxn_id, enzyme_id, constraint, ca_coefficient]
             #  add new_row to dataframe
             self.capacity_sensitivity_coefficients.loc[len(self.capacity_sensitivity_coefficients)] = new_row
+        for transcript in self.transcripts:
+            self.calculate_mrna_csc(transcript, mu_mrna_min, mu_mrna_max, obj_value)
+
+    def calculate_mrna_csc(self, transcript:Transcript,
+                           mu_mrna_min:pd.DataFrame, mu_mrna_max:pd.DataFrame,
+                           obj_value:float) -> None:
+        """
+        Calculate the capacity sensitivity coefficients (CSCs) for constraints related to transcripts. These coefficients
+        reflect the effect of infitesmal changes in the constraint bounds on the objective function. The coefficients
+        and associated reactions will be saved in the capacity_sensitivity_coefficients dataframe.
+
+        The function makes use of the abstracted function calculate_csc_for_molecule in the PAModel object
+
+        Args:
+            transcript:Transcript: transcript object to calculate CSC for
+            mu_mrna_min: pd.DataFrame: Shadowprices for the constraint related to the maximal number of
+                proteins which can be translated from a single mRNA.
+            mu_mrna_max: pd.DataFrame: Shadowprices for the constraint related to the minimal number of
+                proteins which can be translated from a single mRNA.
+            obj_value: float: optimal objective value, commonly maximal growth rate under specific conditions
+        """
+        # only calculate csc if the transcript is actually associated with a constraint
+        if not transcript.id in mu_mrna_max.rxn_id: return
 
-        #TODO sensitivity analysis for transcripts
+        reactions = ','.join(
+            [','.join(self.get_reactions_with_enzyme_id(enzyme.id)) for enzyme in transcript.enzymes])
+        self.calculate_csc_for_molecule(transcript, mu_mrna_min, mu_mrna_max, obj_value, 'transcript', reactions)