Skip to content

Commit

Permalink
integrated transcript data
Browse files Browse the repository at this point in the history
  • Loading branch information
SamiralVdB committed Feb 28, 2024
1 parent 8c1b5fd commit 146fe83
Show file tree
Hide file tree
Showing 13 changed files with 868 additions and 197 deletions.
1 change: 1 addition & 0 deletions Data/TAModel/.~lock.Sinha-etal_2021_transcript-data.xlsx#
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
,samiralvdb,QPE-IAMBPC156,28.02.2024 17:17,file:///home/samiralvdb/.config/libreoffice/4;
Binary file not shown.
Binary file added Data/TAModel/GeneList_ecoli.xlsx
Binary file not shown.
Binary file added Data/TAModel/Sinha-etal_2021_flux-data.xlsx
Binary file not shown.
285 changes: 208 additions & 77 deletions Scripts/.ipynb_checkpoints/create_ecolicore_pam_incl_UE-checkpoint.ipynb

Large diffs are not rendered by default.

285 changes: 208 additions & 77 deletions Scripts/create_ecolicore_pam_incl_UE.ipynb

Large diffs are not rendered by default.

129 changes: 118 additions & 11 deletions Scripts/ecolicore_tam_incl_transcript_info.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
import matplotlib.pyplot as plt
import pandas as pd
import os

from Scripts.tam_generation import set_up_toy_tam, set_up_ecolicore_tam
from Scripts.pam_generation import set_up_ecolicore_pam

sinha_ref_conditions = {
'Holm et al': ['REF', 'NOX', 'ATP'], #WT and NADH overexpression conditions, mu 0.72, 0.65,0.58 h-1 respectively
'Ishii et al': ['WT_0.7h-1'],
'Gerosa et al.': ['Glycerol','Glucose','Acetate', 'Pyruvate','Gluconate','Succinate','Galactose','Fructose'],
}
TRANSCRIPT_FILE_PATH = os.path.join('Data', 'TAModel', 'Sinha-etal_2021_transcript-data.xlsx')
FLUX_FILE_PATH = os.path.join('Data', 'TAModel', 'Sinha-etal_2021_flux-data.xlsx')


mrna_vs_mu_slope = 2.64E-10
Expand All @@ -26,19 +30,122 @@ def get_transcript_data(transcript_file_path:str = TRANSCRIPT_FILE_PATH, mmol =
else:
return expression_data_normalized

if __name__ == '__main__':
def get_flux_data(flux_file_path:str = FLUX_FILE_PATH,
reference: str = 'Holm et al'):
expression_data = pd.read_excel(flux_file_path, sheet_name=reference, index_col=0)
if reference == 'Holm et al':
#remove R suffix
expression_data.index = expression_data.index.str.replace('R_', '')
return expression_data

def get_pam_fluxes(substrate_uptake_rate):
pam = set_up_ecolicore_pam()
pam.change_reaction_bounds('EX_glc__D_e', lower_bound=-substrate_uptake_rate, upper_bound=0)
sol = pam.optimize()
pam_fluxes = sol.fluxes
return pam_fluxes

def set_up_tamodel(strain ='REF'):
tam = set_up_ecolicore_tam()
tam.optimize()
tam.change_reaction_bounds('EX_glc__D_e', lower_bound=-1e6, upper_bound=0)
transcript_data_mmol = get_transcript_data()

for gene, expression_data in transcript_data_mmol.iterrows():
transcript_id = 'mRNA_'+gene
transcript_id = 'mRNA_' + gene
if not transcript_id in tam.transcripts: continue
transcript = tam.transcripts.get_by_id('mRNA_'+gene)
#testing wildtype condition
transcript.change_concentration(concentration=expression_data[0],
error= expression_data[0]*0.01)
print(tam.reactions.get_by_id('EX_glc__D_e'))

tam.optimize()
print(tam.summary())
transcript = tam.transcripts.get_by_id('mRNA_' + gene)
# testing wildtype condition
transcript.change_concentration(concentration=expression_data[strain],
error=expression_data[strain] * 0.01)
return tam

def get_tam_fluxes(tam,substrate_uptake_rate):
tam.change_reaction_bounds('EX_glc__D_e', lower_bound=-substrate_uptake_rate, upper_bound=0)
sol = tam.optimize()
tam_fluxes = sol.fluxes
return tam_fluxes

def compare_flux_data(flux_data, pam_fluxes, tam_fluxes, strain ='REF', abs=True):
if abs:
glc_upt_ref = 1
glc_upt_pam = 1
glc_upt_tam = 1
else:
glc_upt_ref = flux_data[strain]['GLCptspp']
glc_upt_pam = pam_fluxes['GLCpts']
glc_upt_tam = tam_fluxes['GLCpts']

flux_results = flux_data[[strain]]
flux_results_percentage = flux_results.assign(
strain=lambda val: val[strain] / glc_upt_ref)
flux_results_percentage['PAM'] = 0
flux_results_percentage['TAM'] = 0
for rxn in flux_data.index:
ori_rxn = rxn
if 'pp' in rxn: rxn = rxn.replace('pp', '')
if 'biomass' in rxn: rxn = 'BIOMASS_Ecoli_core_w_GAM'
if 'EX_glc' in rxn: rxn = 'EX_glc__D_e'
if 'EX_ac' in rxn: rxn = 'EX_ac_e'
flux_results_percentage['PAM'][ori_rxn] = pam_fluxes[rxn] / glc_upt_pam
flux_results_percentage['TAM'][ori_rxn] = tam_fluxes[rxn] / glc_upt_tam

print(flux_results_percentage.to_markdown())
return flux_results_percentage

def compare_fluxes_holm_reference(strain = 'REF'):
flux_data =get_flux_data()
substrate_uptake_rate = flux_data[strain]['GLCptspp']

pam_fluxes = get_pam_fluxes(substrate_uptake_rate=substrate_uptake_rate)

tam = set_up_tamodel(strain)
tam_fluxes = get_tam_fluxes(tam, substrate_uptake_rate=substrate_uptake_rate)
for i,row in tam.capacity_sensitivity_coefficients.iterrows():
if row.coefficient > 0: print(row)

flux_relative = compare_flux_data(flux_data, pam_fluxes, tam_fluxes, strain,abs = False)
flux_absolute = compare_flux_data(flux_data, pam_fluxes, tam_fluxes, strain)

plot_flux_comparison(flux_absolute, flux_relative, strain)

def plot_flux_comparison(flux_df_abs, flux_df_rel, strain):
fig, ax = plt.subplots(1,2)

ax[0].scatter(flux_df_abs['TAM'], flux_df_abs['strain'], color = 'black')
ax[0].scatter(flux_df_abs['PAM'], flux_df_abs['strain'], color ='red')
#reference line
ax[0].plot(flux_df_abs['strain'], flux_df_abs['strain'], linestyle ='dashed')

ax[0].set_title(strain + ' absolute fluxes')
ax[0].set_xlabel('simulated flux [$mmol/g_{CDW}/h$]')
ax[0].set_ylabel('measured flux [$mmol/g_{CDW}/h$]')

ax[1].scatter(flux_df_rel['TAM'], flux_df_rel['strain'], color='black', label = 'TAM')
ax[1].scatter(flux_df_rel['PAM'], flux_df_rel['strain'], color='red', label = 'PAM')
# reference line
ax[1].plot(flux_df_rel['strain'], flux_df_rel['strain'], linestyle='dashed')

ax[1].set_title(strain + ' relative fluxes')
ax[1].set_xlabel('simulated flux [$mmol/g_{CDW}/h$]')
ax[1].set_ylabel('measured flux [$mmol/g_{CDW}/h$]')

fig.set_figwidth(20)
fig.set_figheight(10)
plt.legend()
plt.show()


if __name__ == '__main__':
print('Reference condition')
compare_fluxes_holm_reference()
print('\n-------------------------------------------------------------------------------------------------')
print('mutation 1: NOX strain (overexpression of NADH oxidase)\n')
compare_fluxes_holm_reference('NOX')
# TODO print mRNA and protein concentrations to compare with lb
# TODO print shadowprices of mRNA (are lbs hit? how far can I constrain?)






4 changes: 3 additions & 1 deletion Scripts/pam_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def set_up_ecolicore_pam(total_protein:bool = True, active_enzymes: bool = True,
DATA_DIR = 'Data'
MODEL_DIR = 'Models'
PAM_DATA_FILE_PATH = os.path.join(DATA_DIR, 'proteinAllocationModel_iML1515_EnzymaticData_py.xls')
TAM_DATA_FILE_PATH = os.path.join(DATA_DIR, 'TAModel','2024-02-16_gene_enzyme_reaction_relation_Ecoli.xlsx')
TAM_DATA_FILE_PATH = os.path.join(DATA_DIR, 'TAModel','2024-02-27_gene_enzyme_reaction_relation_Ecoli.xlsx')

# some other constants
BIOMASS_REACTION = 'BIOMASS_Ecoli_core_w_GAM'
Expand Down Expand Up @@ -376,6 +376,7 @@ def _get_fwd_bckw_kcat(rxn_id: str, kcat:float, model:PAModel) -> Union[list, No

# Iterate over each identifier in the input
if base_id in model.reactions:
if not model.reactions.get_by_id(base_id).genes: return None
# Determine the form of the identifier
if rxn_id.endswith('_f'):
kcat_fwd = kcat
Expand All @@ -390,6 +391,7 @@ def _get_fwd_bckw_kcat(rxn_id: str, kcat:float, model:PAModel) -> Union[list, No
else:
return None
elif rxn_id in model.reactions:
if not model.reactions.get_by_id(rxn_id).genes: return None
kcat_fwd = kcat
kcat_rev = kcat
else:
Expand Down
13 changes: 13 additions & 0 deletions Scripts/parse_ecoli_gpr_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,13 @@ def parse_gpr_relationships_from_Ecocyc():
'Molecular-Weight-KiloDaltons', 'mrna_length', 'gpr']]
enzyme_gene_reaction_relation.columns = ['Gene', 'Enzyme', 'Reaction', 'gene_id', 'enzyme_id',
'molmass_kDa', 'mrna_length', 'gpr']

#Get enzyme and kcat information already available
tam_info_merged = parse_enzymatic_data_information(enzyme_gene_reaction_relation)

#Append the information of each gene with information from the 'GeneList'
tam_info_merged = parse_ecoli_genome_information(tam_info_merged)

#write to excel
with pd.ExcelWriter(TAM_DATA_FILE) as writer:
tam_info_merged.to_excel(writer, sheet_name='enzyme-gene-reaction')
Expand Down Expand Up @@ -82,7 +87,15 @@ def parse_enzymatic_data_information(enzyme_gene_reaction_relation):
tam_info = tam_info.drop(['molmass_kDa', 'Reaction'], axis = 1)
return tam_info

def parse_ecoli_genome_information(tam_info_merged):
genome_information = pd.read_excel(os.path.join('Data', 'TAModel','GeneList_ecoli.xlsx'),
sheet_name='GeneList').set_index('bnumber')
genome_info_useful = genome_information[['start', 'end']]
for bnumber, row in genome_info_useful.iterrows():
tam_info_gene = tam_info_merged[tam_info_merged.gene_id == bnumber]
tam_info_gene['mrna_length'] = row.start - row.end

return tam_info_merged

if __name__ == '__main__':
parse_gpr_relationships_from_Ecocyc()
6 changes: 4 additions & 2 deletions Scripts/tam_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,14 @@ def set_up_ecolicore_tam(total_protein:bool = True, active_enzymes: bool = True,
# Setting the relative paths
DATA_DIR = os.path.join('Data')
MODEL_DIR = os.path.join('Models')
TAM_DATA_FILE_PATH = os.path.join(DATA_DIR, 'TAModel','2024-02-16_gene_enzyme_reaction_relation_Ecoli.xlsx')
TAM_DATA_FILE_PATH = os.path.join(DATA_DIR, 'TAModel','2024-02-27_gene_enzyme_reaction_relation_Ecoli.xlsx')


# some other constants
BIOMASS_REACTION = 'BIOMASS_Ecoli_core_w_GAM'
TOTAL_PROTEIN_CONCENTRATION = 0.16995 # [g_prot/g_cdw]
# TOTAL_PROTEIN_CONCENTRATION = 0.16995 # [g_prot/g_cdw]
TOTAL_PROTEIN_CONCENTRATION = 0.185 # [g_prot/g_cdw]

MRNA_MU = 0.00013049558330984208 # [g_mrna/g_cdw/h]
MRNA_0= 1.7750480089801658e-05 # [g_mrna/g_cdw]

Expand Down
Loading

0 comments on commit 146fe83

Please sign in to comment.