Skip to content

Commit

Permalink
updates
Browse files Browse the repository at this point in the history
  • Loading branch information
SamiralVdB committed May 16, 2024
1 parent eda64b5 commit 134b24e
Show file tree
Hide file tree
Showing 5 changed files with 206 additions and 87 deletions.
36 changes: 28 additions & 8 deletions Scripts/ecoli_tam_incl_transcript_info.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
import cobra

Expand Down Expand Up @@ -68,6 +69,24 @@ def get_transcript_data(transcript_file_path:str = TRANSCRIPT_FILE_PATH, mmol =
else:
return expression_data_normalized

def get_differentially_expressed_transcript_data(strain):
transcript_data = get_transcript_data()
transcript_data_diff = transcript_data.assign(logfc = lambda x: np.log10(x[strain]/x['REF']))
# transcript_data_diff = transcript_data_diff.sort_values('logfc', ascending=False)
# plot histogram
bins = 100

# plt.hist(transcript_data_diff.logfc, bins=bins)
# plt.xlabel('logfc')
# plt.ylabel('number of genes')
# plt.show()
transcript_data_diff = transcript_data_diff[transcript_data_diff.logfc.abs()>0.05]
# print(transcript_data_diff)
# print(len(transcript_data_diff))

return transcript_data_diff


def get_flux_data(flux_file_path:str = FLUX_FILE_PATH,
reference: str = 'Holm et al'):
expression_data = pd.read_excel(flux_file_path, sheet_name=reference, index_col=0)
Expand All @@ -79,21 +98,22 @@ def get_flux_data(flux_file_path:str = FLUX_FILE_PATH,
def get_pam_fluxes(substrate_uptake_rate, strain ='REF'):
pam = set_up_ecoli_pam()
pam.change_reaction_bounds('EX_glc__D_e', lower_bound=-substrate_uptake_rate, upper_bound=0)
if strain == 'NOX':
add_nox_protein_to_model(pam)
# if strain == 'NOX':
# add_nox_protein_to_model(pam)
sol = pam.optimize()
pam_fluxes = sol.fluxes
return pam_fluxes,pam

def set_up_tamodel(substrate_uptake_rate, strain ='REF'):
tam = set_up_ecoli_tam()
tam.change_reaction_bounds('EX_glc__D_e', lower_bound=-substrate_uptake_rate, upper_bound=0)
if strain == 'NOX':
add_nox_protein_to_model(tam)
add_nox_transcript_to_model(tam)
tam.transcripts.get_by_id('mRNA_nox').change_concentration(1e-3, 1e-5)
# if strain == 'NOX':
# add_nox_protein_to_model(tam)
# add_nox_transcript_to_model(tam)
# tam.transcripts.get_by_id('mRNA_nox').change_concentration(1e-3, 1e-5)

transcript_data_mmol = get_transcript_data()
# transcript_data_mmol = get_transcript_data()
transcript_data_mmol = get_differentially_expressed_transcript_data(strain)
for gene, expression_data in transcript_data_mmol.iterrows():
transcript_id = 'mRNA_' + gene
if not transcript_id in tam.transcripts: continue
Expand Down Expand Up @@ -219,7 +239,7 @@ def plot_flux_comparison(flux_df_abs, flux_df_rel, strain):
if __name__ == '__main__':

print('Reference condition')
compare_fluxes_holm_reference(strain = 'REF', plot =True)
compare_fluxes_holm_reference(strain = 'NOX', plot =True)
print('\n-------------------------------------------------------------------------------------------------')
# print('mutation 1: NOX strain (overexpression of NADH oxidase)\n')
# compare_fluxes_holm_reference('NOX', plot=False)
Expand Down
2 changes: 1 addition & 1 deletion Scripts/ecolicore_tam_incl_transcript_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def plot_flux_comparison(flux_df_abs, flux_df_rel, strain):
if __name__ == '__main__':

print('Reference condition')
compare_fluxes_holm_reference(strain = 'NOX', plot =False)
compare_fluxes_holm_reference(strain = 'REF', plot =False)
print('\n-------------------------------------------------------------------------------------------------')
# print('mutation 1: NOX strain (overexpression of NADH oxidase)\n')
# compare_fluxes_holm_reference('NOX', plot=False)
Expand Down
86 changes: 57 additions & 29 deletions src/PAModelpy/PAModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -840,19 +840,16 @@ def calculate_csc(self, obj_value, mu, mu_ub, mu_lb, mu_ec_f, mu_ec_b):
Definition: constraint_UB*shadowprice/obj_value.
:param obj_value: Float
:param mu: DataFrame
Shadowprices for all constraints
:param mu_ub: DataFrame
Shadowprices for the reaction UB constraints
:param mu_lb: DataFrame
Shadowprices for the reaction LB constraints
:param mu_ec_f: DataFrame
Shadowprices for the constraint related to an enzymatic catalysis of the forward reaction
:param mu_ec_b: DataFrame
Shadowprices for the constraint related to an enzymatic catalysis of the backward reaction
Results will be saved in the self.capacity_sensitivity_coefficients attribute as a dataframe
Args:
obj_value: Float: optimal objective value, commonly maximal growth rate under specific conditions
mu: DataFrame: shadowprices for all constraints
mu_ub: DataFrame: Shadowprices for the reaction UB constraints
mu_lb: DataFrame: Shadowprices for the reaction LB constraints
mu_ec_f: DataFrame: Shadowprices for the constraint related to an enzymatic catalysis of the forward reaction
mu_ec_b: DataFrame: Shadowprices for the constraint related to an enzymatic catalysis of the backward reaction
"""
self.capacity_sensitivity_coefficients = pd.DataFrame(columns=['rxn_id', 'enzyme_id', 'constraint', 'coefficient'])
# add capacity sensitivity coefficients for sectors if they are there
Expand Down Expand Up @@ -897,24 +894,55 @@ def calculate_csc(self, obj_value, mu, mu_ub, mu_lb, mu_ec_f, mu_ec_b):
self.capacity_sensitivity_coefficients.loc[len(self.capacity_sensitivity_coefficients)] = new_row_LB

for enzyme in self.enzymes:
# get reactions associated with this enzyme
reactions = ','.join(self.get_reactions_with_enzyme_id(enzyme.id))
# get the right row from the shadow price dataframes
# min constraints can be skipped as they always equal to 0 (the enzymes cannot have a concentration lower than 0)
mu_ec_max_row = mu_ec_f[mu_ec_f['index'] == f'{enzyme.id}_max']
mu_ec_min_row = mu_ec_b[mu_ec_b['index'] == f'{enzyme.id}_min']

# max Enzyme constraint
ca_coefficient_EC_max= self.constraints[f'{enzyme.id}_max'].ub * mu_ec_max_row['shadow_prices'].iloc[0] / obj_value
new_enzyme_row_EC_max =[reactions, enzyme.id, 'enzyme_max', ca_coefficient_EC_max]
# add new_row to dataframe
self.capacity_sensitivity_coefficients.loc[len(self.capacity_sensitivity_coefficients)] = new_enzyme_row_EC_max
self.calculate_enzyme_csc(enzyme, mu_ec_f, mu_ec_b, obj_value)

# min Enzyme constraint
ca_coefficient_EC_min = self.constraints[f'{enzyme.id}_min'].ub * mu_ec_min_row['shadow_prices'].iloc[0] / obj_value
new_enzyme_row_EC_min =[reactions, enzyme.id, 'enzyme_min', ca_coefficient_EC_min]
# add new_row to dataframe
self.capacity_sensitivity_coefficients.loc[len(self.capacity_sensitivity_coefficients)] = new_enzyme_row_EC_min
def calculate_csc_for_molecule(self, molecule: Union[Enzyme],
mu_min:pd.DataFrame, mu_max:pd.DataFrame, obj_value:float,
constraint_type:str, associated_reactions:str):
"""
Calculate the capacity sensitivity coefficients (CSCs) for constraints related to a biomolecule,
such as enzymes. These coefficients reflect the effect of infitesmal changes in the constraint bounds
on the objective function.
The coefficients and associated reactions will be saved in the capacity_sensitivity_coefficients dataframe.
Args:
enzyme:Enzyme: enzyme object to calculate CSC for
mu_min: DataFrame: Shadowprices for the constraint related to a lower bound/minimum
mu_max: DataFrame: Shadowprices for the constraint related to an upper bound/maximum
obj_value: float: optimal objective value, commonly maximal growth rate under specific conditions
"""
# get the right row from the shadow price dataframes
mu_max_row = mu_max[mu_max['index'] == f'{molecule.id}_max']
mu_min_row = mu_min[mu_min['index'] == f'{molecule.id}_min']

# Calculate sensitivity coefficients for maximum constraint
ca_coefficient_max = self.constraints[f'{molecule.id}_max'].ub * mu_max_row['shadow_prices'].iloc[0] / obj_value
new_row_max = [associated_reactions, molecule.id, f'{constraint_type}_max', ca_coefficient_max]
self.capacity_sensitivity_coefficients.loc[len(self.capacity_sensitivity_coefficients)] = new_row_max

# Calculate sensitivity coefficients for minimum constraint
ca_coefficient_min = self.constraints[f'{molecule.id}_min'].ub * mu_min_row['shadow_prices'].iloc[0] / obj_value
new_row_min = [associated_reactions, molecule.id, f'{constraint_type}_min', ca_coefficient_min]
self.capacity_sensitivity_coefficients.loc[len(self.capacity_sensitivity_coefficients)] = new_row_min


def calculate_enzyme_csc(self, enzyme:Enzyme, mu_ec_f:pd.DataFrame, mu_ec_b:pd.DataFrame, obj_value:float):
"""
Calculate the capacity sensitivity coefficients (CSCs) for constraints related to enzyme. These coefficients
reflect the effect of infitesmal changes in the constraint bounds on the objective function. The coefficients
and associated reactions will be saved in the capacity_sensitivity_coefficients dataframe.
The function makes use of the abstracted function calculate_csc_for_molecule
Args:
enzyme:Enzyme: enzyme object to calculate CSC for
mu_ec_f: DataFrame: Shadowprices for the constraint related to an enzymatic catalysis of the forward reaction
mu_ec_b: DataFrame: Shadowprices for the constraint related to an enzymatic catalysis of the backward reaction
obj_value: float: optimal objective value, commonly maximal growth rate under specific conditions
"""
reactions = ','.join(self.get_reactions_with_enzyme_id(enzyme.id))
self.calculate_csc_for_molecule(enzyme, mu_ec_b, mu_ec_f, obj_value, 'enzyme', reactions)

def calculate_esc(self, obj_value, mu_ec_f, mu_ec_b):
"""
Expand Down
133 changes: 84 additions & 49 deletions src/TAModelpy/TAModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import cobra
import numpy as np
import pandas as pd
from cobra import Object, DictList, Gene
from optlang.symbolics import Zero
import re
Expand Down Expand Up @@ -265,39 +266,6 @@ def add_transcript_enzyme_relations(self, enzyme: Union[Enzyme, EnzymeComplex])
for relation, transcripts in transcript_associated_with_enzyme.items():
self.make_mrna_max_constraint(enzyme, transcripts)

def set_transcript_enzyme_min_relation(self, transcript: Transcript):
transcripts = [transcript]
if len(transcript._lumped_transcripts) > 0:
transcripts = transcript._lumped_transcripts
for transcript in transcripts:
if f'{transcript.id}_min' not in self.constraints.keys():
min_constraint = self.problem.Constraint(Zero, name=f'{transcript.id}_min', lb=-1e6, ub=0)

self.add_cons_vars([min_constraint])

self.constraints[f'{transcript.id}_min'].set_linear_coefficients({
transcript.mrna_variable: transcript.f_min
})

for enz in transcript.enzymes:
# make separate constraints for forward and reverse enzyme variables
fwd_var = self.enzyme_variables.get_by_id(enz.id).forward_variable
rev_var = self.enzyme_variables.get_by_id(enz.id).reverse_variable

if f'{transcript.id}_min' not in self.constraints.keys():
min_constraint = self.problem.Constraint(Zero, name=f'{transcript.id}_min', lb=-1e6, ub=0)
self.add_cons_vars([min_constraint])

self.constraints[f'{transcript.id}_min'].set_linear_coefficients({
fwd_var: -1,
rev_var: -1
})

# save the constraints in the transcript object
transcript._constraints = {**transcript._constraints,
**{f'{transcript.id}_min': self.constraints[f'{transcript.id}_min']
}}


def add_total_mrna_constraint(self, mrna_sector:ActivemRNASector) -> None:
"""Adds a constraint to limit the total mRNA abundance.
Expand Down Expand Up @@ -359,6 +327,39 @@ def _check_if_gene_in_enzyme_genes(self, gene_id: str,
return True
return False

def set_transcript_enzyme_min_relation(self, transcript: Transcript):
transcripts = [transcript]
if len(transcript._lumped_transcripts) > 0:
transcripts = transcript._lumped_transcripts
for transcript in transcripts:
if f'{transcript.id}_min' not in self.constraints.keys():
min_constraint = self.problem.Constraint(Zero, name=f'{transcript.id}_min', lb=-1e6, ub=0)

self.add_cons_vars([min_constraint])

self.constraints[f'{transcript.id}_min'].set_linear_coefficients({
transcript.mrna_variable: transcript.f_min
})

for enz in transcript.enzymes:
# make separate constraints for forward and reverse enzyme variables
fwd_var = self.enzyme_variables.get_by_id(enz.id).forward_variable
rev_var = self.enzyme_variables.get_by_id(enz.id).reverse_variable

if f'{transcript.id}_min' not in self.constraints.keys():
min_constraint = self.problem.Constraint(Zero, name=f'{transcript.id}_min', lb=-1e6, ub=0)
self.add_cons_vars([min_constraint])

self.constraints[f'{transcript.id}_min'].set_linear_coefficients({
fwd_var: -1,
rev_var: -1
})

# save the constraints in the transcript object
transcript._constraints = {**transcript._constraints,
**{f'{transcript.id}_min': self.constraints[f'{transcript.id}_min']
}}

def make_mrna_max_constraint(self, enz: Enzyme,
transcripts:Union[Transcript, list]) -> Union[Transcript, list]:
"""
Expand Down Expand Up @@ -630,31 +631,40 @@ def _gene_is_in_enzyme_complex(self, gene: Union[str, Gene], enzyme: Enzyme) ->
else:
return False

def calculate_csc(self, obj_value, mu, mu_ub, mu_lb, mu_ec_f, mu_ec_b):
def calculate_csc(self, obj_value, mu, mu_ub, mu_lb, mu_ec_f, mu_ec_b) -> None:
"""
Calculates the capacity sensitivity coefficient for all inequality constraints in the model.
The sum of all capacity sensitivity coefficients should equal 1 for growth maximization.
Definition: constraint_UB*shadowprice/obj_value.
Inherits from the PAModel calculate_csc function and adds the calculation of the csc for the total rna
constraint.
:param obj_value: Float
:param mu: DataFrame
Shadowprices for all constraints
:param mu_ub: DataFrame
Shadowprices for the reaction UB constraints
:param mu_lb: DataFrame
Shadowprices for the reaction LB constraints
:param mu_ec_f: DataFrame
Shadowprices for the constraint related to an enzymatic catalysis of the forward reaction
:param mu_ec_b: DataFrame
Shadowprices for the constraint related to an enzymatic catalysis of the backward reaction
Inherits from the PAModel calculate_csc function and adds the calculation of the csc for the total mrna
and individual transcript-enzyme constraint
Args:
obj_value: Float: optimal objective value, commonly maximal growth rate under specific conditions
mu: DataFrame: shadowprices for all constraints
mu_ub: DataFrame: Shadowprices for the reaction UB constraints
mu_lb: DataFrame: Shadowprices for the reaction LB constraints
mu_ec_f: DataFrame: Shadowprices for the constraint related to an enzymatic catalysis of the forward
reaction.This also includes the maximal transcript constraints (representing the maximal number of
proteins which can be translated from a single mRNA)
mu_ec_b: DataFrame: Shadowprices for the constraint related to an enzymatic catalysis of the backward
reaction.This also includes the minimal transcript constraints (representing the minimal number of
proteins which can be translated from a single mRNA).
Results will be saved in the self.capacity_sensitivity_coefficients attribute as a dataframe
"""
#split enzyme and mrna constraints
mu_mrna_max = mu_ec_f[mu_ec_f['rxn_id'].str.contains('mRNA')]
mu_ec_f = mu_ec_f[~mu_ec_f['rxn_id'].str.contains('mRNA')]

mu_mrna_min = mu_ec_b[mu_ec_b['rxn_id'].str.contains('mRNA')]
mu_ec_b = mu_ec_b[~mu_ec_b['rxn_id'].str.contains('mRNA')]

super().calculate_csc(obj_value, mu, mu_ub, mu_lb, mu_ec_f, mu_ec_b)

#TAModel specific constraints
if self.TOTAL_MRNA_CONSTRAINT_ID in self.constraints.keys():
constraint = 'mRNA'
rxn_id = self.TOTAL_MRNA_CONSTRAINT_ID
Expand All @@ -665,5 +675,30 @@ def calculate_csc(self, obj_value, mu, mu_ub, mu_lb, mu_ec_f, mu_ec_b):
new_row = [rxn_id, enzyme_id, constraint, ca_coefficient]
# add new_row to dataframe
self.capacity_sensitivity_coefficients.loc[len(self.capacity_sensitivity_coefficients)] = new_row
for transcript in self.transcripts:
self.calculate_mrna_csc(transcript, mu_mrna_min, mu_mrna_max, obj_value)

def calculate_mrna_csc(self, transcript:Transcript,
mu_mrna_min:pd.DataFrame, mu_mrna_max:pd.DataFrame,
obj_value:float) -> None:
"""
Calculate the capacity sensitivity coefficients (CSCs) for constraints related to transcripts. These coefficients
reflect the effect of infitesmal changes in the constraint bounds on the objective function. The coefficients
and associated reactions will be saved in the capacity_sensitivity_coefficients dataframe.
The function makes use of the abstracted function calculate_csc_for_molecule in the PAModel object
Args:
transcript:Transcript: transcript object to calculate CSC for
mu_mrna_min: pd.DataFrame: Shadowprices for the constraint related to the maximal number of
proteins which can be translated from a single mRNA.
mu_mrna_max: pd.DataFrame: Shadowprices for the constraint related to the minimal number of
proteins which can be translated from a single mRNA.
obj_value: float: optimal objective value, commonly maximal growth rate under specific conditions
"""
# only calculate csc if the transcript is actually associated with a constraint
if not transcript.id in mu_mrna_max.rxn_id: return

#TODO sensitivity analysis for transcripts
reactions = ','.join(
[','.join(self.get_reactions_with_enzyme_id(enzyme.id)) for enzyme in transcript.enzymes])
self.calculate_csc_for_molecule(transcript, mu_mrna_min, mu_mrna_max, obj_value, 'transcript', reactions)
Loading

0 comments on commit 134b24e

Please sign in to comment.