Skip to content

Commit

Permalink
ecolicore tam is feasible. Added and-or relationships
Browse files Browse the repository at this point in the history
  • Loading branch information
SamiralVdB committed Feb 27, 2024
1 parent 9822364 commit 8c1b5fd
Show file tree
Hide file tree
Showing 22 changed files with 1,173 additions and 216 deletions.

This file was deleted.

Binary file modified Data/TAModel/Sinha-etal_2021_transcript-data.xlsx
Binary file not shown.
Binary file modified Data/proteinAllocationModel_iML1515_EnzymaticData_230503.xls
Binary file not shown.

Large diffs are not rendered by default.

700 changes: 647 additions & 53 deletions Scripts/Translation_parameters_estimation.ipynb

Large diffs are not rendered by default.

Binary file modified Scripts/__pycache__/pam_generation.cpython-310.pyc
Binary file not shown.
Binary file modified Scripts/__pycache__/toy_ec_pam.cpython-310.pyc
Binary file not shown.
44 changes: 44 additions & 0 deletions Scripts/ecolicore_tam_incl_transcript_info.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import pandas as pd
import os

from Scripts.tam_generation import set_up_toy_tam, set_up_ecolicore_tam
sinha_ref_conditions = {
'Holm et al': ['REF', 'NOX', 'ATP'], #WT and NADH overexpression conditions, mu 0.72, 0.65,0.58 h-1 respectively
'Ishii et al': ['WT_0.7h-1'],
'Gerosa et al.': ['Glycerol','Glucose','Acetate', 'Pyruvate','Gluconate','Succinate','Galactose','Fructose'],
}
TRANSCRIPT_FILE_PATH = os.path.join('Data', 'TAModel', 'Sinha-etal_2021_transcript-data.xlsx')


mrna_vs_mu_slope = 2.64E-10
mrna_vs_mu_intercept = 3.59E-11


def get_transcript_data(transcript_file_path:str = TRANSCRIPT_FILE_PATH, mmol = True,
reference: str = 'Holm et al', growth_rates = [0.72, 0.65,0.58]):
expression_data = pd.read_excel(transcript_file_path, sheet_name=reference, index_col=0)
# Normalize columns by dividing by the sum of each column
expression_data_normalized = expression_data.div(expression_data.sum(axis=0), axis=1)
if mmol:
mrna_conc = [mrna_vs_mu_intercept + mrna_vs_mu_slope*mu for mu in growth_rates]
expression_data_mmol = expression_data_normalized.apply(lambda row: row * mrna_conc, axis=1)
return expression_data_mmol
else:
return expression_data_normalized

if __name__ == '__main__':
tam = set_up_ecolicore_tam()
tam.optimize()
transcript_data_mmol = get_transcript_data()

for gene, expression_data in transcript_data_mmol.iterrows():
transcript_id = 'mRNA_'+gene
if not transcript_id in tam.transcripts: continue
transcript = tam.transcripts.get_by_id('mRNA_'+gene)
#testing wildtype condition
transcript.change_concentration(concentration=expression_data[0],
error= expression_data[0]*0.01)
print(tam.reactions.get_by_id('EX_glc__D_e'))

tam.optimize()
print(tam.summary())
287 changes: 182 additions & 105 deletions Scripts/pam_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
import pandas as pd
import os
from typing import Union
import ast

# load PAMpy modules
from PAModelpy.PAModel import PAModel
from PAModelpy.EnzymeSectors import ActiveEnzymeSector, UnusedEnzymeSector, TransEnzymeSector
from PAModelpy.configuration import Config
from src.PAModelpy.PAModel import PAModel
from src.PAModelpy.EnzymeSectors import ActiveEnzymeSector, UnusedEnzymeSector, TransEnzymeSector
from src.PAModelpy.configuration import Config

from Scripts.toy_ec_pam import build_toy_gem, build_active_enzyme_sector, build_translational_protein_sector, build_unused_protein_sector

Expand All @@ -26,7 +27,7 @@ def set_up_toy_pam(sensitivity =True):
active_enzyme = build_active_enzyme_sector(config)
unused_enzyme = build_unused_protein_sector(config)
translation_enzyme = build_translational_protein_sector(config)
pamodel = PAModel(model, name='toy model MCA with enzyme constraints', active_sector=active_enzyme,
pamodel = PAModel(model, name='toy model with enzyme constraints', active_sector=active_enzyme,
translational_sector=translation_enzyme,
unused_sector=unused_enzyme, p_tot=Etot, sensitivity=sensitivity)
pamodel.objective = 'R7'
Expand All @@ -35,122 +36,33 @@ def set_up_toy_pam(sensitivity =True):

def set_up_ecolicore_pam(total_protein:bool = True, active_enzymes: bool = True, translational_enzymes:bool = True, unused_enzymes:bool = True, sensitivity =True):
# Setting the relative paths
DATA_DIR = os.path.join(os.path.split(os.getcwd())[0], 'Data')
MODEL_DIR = os.path.join(os.path.split(os.getcwd())[0], 'Models')
DATA_DIR = 'Data'
MODEL_DIR = 'Models'
PAM_DATA_FILE_PATH = os.path.join(DATA_DIR, 'proteinAllocationModel_iML1515_EnzymaticData_py.xls')
TAM_DATA_FILE_PATH = os.path.join(DATA_DIR, 'TAModel','2024-02-16_gene_enzyme_reaction_relation_Ecoli.xlsx')

# some other constants
BIOMASS_REACTION = 'BIOMASS_Ecoli_core_w_GAM'
TOTAL_PROTEIN_CONCENTRATION = 0.16995 # [g_prot/g_cdw]

config = Config()
config.reset()
config.BIOMASS_REACTION = BIOMASS_REACTION

# load the genome-scale information
model = cobra.io.load_json_model(os.path.join(MODEL_DIR, 'e_coli_core.json'))

#load example data for the E.coli iML1515 model
if active_enzymes:
# load active enzyme sector information
enzyme_db = pd.read_excel(PAM_DATA_FILE_PATH, sheet_name='ActiveEnzymes')
enzyme_db = enzyme_db.set_index('rxnID')
# correct reaction IDS
for idx in enzyme_db.index.to_list():
# transprt reactions<

if 'pp' in idx:
idx_new = idx.replace('pp', '')
if idx_new not in enzyme_db.index:
enzyme_db.rename(index={idx: idx_new}, inplace=True)
if 'ex' in idx:
idx_new = idx.replace('ex', '')
if idx_new not in enzyme_db.index:
enzyme_db.rename(index={idx: idx_new}, inplace=True)

# replace NaN values with unique identifiers
# replace NaN enzyme ids with a dummy enzyme identifier
# select the NaN values
nan_values = enzyme_db['EC_nmbr'].isnull()
# make a list with unique ids
nan_ids = [f'E{i}' for i in range(nan_values.sum())]
# replace nan values by unique id
enzyme_db.loc[nan_values, 'EC_nmbr'] = nan_ids
enzyme_db = _parse_enzyme_information_from_file(TAM_DATA_FILE_PATH)

# create enzyme objects for each gene-associated reaction
kcats = {}
rxn2ec = {}
molmass = {}
for rxn in model.reactions:
if rxn.genes:
# correct transport reactions
if 't' in rxn.id:
rxn.id = rxn.id
# are enzyme information in the PAM database?
rev = 0 # denotes reversibility
if rxn.lower_bound >= 0:
# irreversible reaction (forward direction)
rev = 0
rxn_id = rxn.id # save reaction ID for retrieveing molar masses/enzyme information later
if rxn.id in enzyme_db.index:
kcats[rxn.id] = {'f': enzyme_db.loc[rxn.id, 'kcat']}
elif rxn.upper_bound <= 0:
# irreversible reaction (reverse direction)
rev = 1
rxn_id = rxn.id + '_b'
if rxn_id in enzyme_db.index:
kcats[rxn.id] = {'b': enzyme_db.loc[rxn_id, 'kcat']}
else:
rev = 2
# reversible reaction
rxn_id_f = rxn.id + '_f'
rxn_id_b = rxn.id + '_b'
if rxn_id_f in enzyme_db.index and rxn_id_b in enzyme_db.index:
rxn_id = rxn_id_f # save reaction ID for retrieveing molar masses/enzyme information later
kcats[rxn.id] = {'f': enzyme_db.loc[rxn_id_f, 'kcat'],
'b': enzyme_db.loc[rxn_id_b, 'kcat']}

else:
# try if only forward reaction is in database
rxn_id = rxn.id # save reaction ID for retrieveing molar masses/enzyme information later
kcats[rxn.id] = {'f': enzyme_db.loc[rxn.id, 'kcat'],
'b': enzyme_db.loc[
rxn.id, 'kcat'] / 2} # deduce backwards kcat from forward value

# where enzyme information found?
if rxn.id in kcats.keys():
# save molmass
molmass[rxn.id] = enzyme_db.loc[rxn_id, 'molMass']
# save enzyme information
# is enzyme information NaN?
if pd.isna(enzyme_db.loc[rxn_id, 'EC_nmbr']):
rxn2ec[rxn.id] = ''
else:
rxn2ec[rxn.id] = enzyme_db.loc[rxn_id, 'EC_nmbr']


else:
# no enzyme information found
print('No enzyme information found for reaction: ' + rxn.id)
# Create generic Enzyme with mean molar masses and kcat
if rev == 0:
kcats[rxn.id] = {'f': 22}
elif rev == 1:
kcats[rxn.id] = {'b': 22}
else:
kcats[rxn.id] = {'f': 22, 'b': 22}

molmass[rxn.id] = 3.947778784340140e04

rxn2protein = {}
for rxn, ec in rxn2ec.items():
ec_dict = {**kcats[rxn], **{'molmass': molmass[rxn]}}
# add enzyme to enzymes related to reaction if these are already stored
if rxn in rxn2protein.keys():
rxn2protein[rxn] = {**rxn2protein[rxn], **{ec: ec_dict}}
# if not create new reaction entry
else:
rxn2protein[rxn] = {ec: ec_dict}
rxn2protein, protein2gene, gene2transcript = parse_reaction2protein(enzyme_db, model)

# create active enzymes sector
active_enzyme_sector = ActiveEnzymeSector(rxn2protein=rxn2protein)

active_enzyme_sector = ActiveEnzymeSector(rxn2protein=rxn2protein,
protein2gene=protein2gene,
configuration=config)
else:
active_enzyme_sector = None

Expand Down Expand Up @@ -340,3 +252,168 @@ def parse_coefficients(pamodel):
def parse_esc(pamodel):
return pamodel.enzyme_sensitivity_coefficients.coefficient.to_list()


def _parse_enzyme_information_from_file(file_path:str):
# load active enzyme sector information
enzyme_db = pd.read_excel(file_path, sheet_name='enzyme-gene-reaction')
# enzyme_db = enzyme_db.set_index('rxnID')
# correct reaction IDS
for idx in enzyme_db.rxnID.to_list():
# transport reactions
if 'pp' in idx:
idx_new = idx.replace('pp', '')
if idx_new not in enzyme_db.index:
enzyme_db.rename(index={idx: idx_new}, inplace=True)
if 'ex' in idx:
idx_new = idx.replace('ex', '')
if idx_new not in enzyme_db.index:
enzyme_db.rename(index={idx: idx_new}, inplace=True)

# replace NaN values with unique identifiers
# replace NaN enzyme ids with a dummy enzyme identifier
# select the NaN values
nan_values = enzyme_db['EC_nmbr'].isnull()
# make a list with unique ids
nan_ids = [f'E{i}' for i in range(nan_values.sum())]
# replace nan values by unique id
enzyme_db.loc[nan_values, 'EC_nmbr'] = nan_ids

return enzyme_db


def parse_reaction2protein(enzyme_db: pd.DataFrame, model:cobra.Model) -> dict:
# Initialize dictionaries
rxn2protein = {}
protein2gene = {}
gene2transcript = {'gene_dummy': {'id': 'mRNA_gene_dummy', 'length': 750.0}}

# Iterate over each row in the DataFrame
for index, row in enzyme_db.iterrows():
# Parse data from the row
rxn_id = row['rxnID']
rxn_id = _check_rxn_identifier_format(rxn_id)
#only parse those reactions which are in the model
kcat_f_b = _get_fwd_bckw_kcat(rxn_id, row['kcat'], model)
if kcat_f_b is None: continue
#check if there is a forward or backward string which needs to be removed from the rxn id
if any([rxn_id.endswith('_f'), rxn_id.endswith('_b')]): rxn_id = rxn_id[:-2]

#get the identifiers and replace nan values by dummy placeholders
enzyme_id = row['EC_nmbr']
if not isinstance(enzyme_id, str): enzyme_id = 'Enzyme_'+rxn_id
gene_id = row['gene_id']
if not isinstance(gene_id, str): gene_id = 'gene_dummy'
mrna_length = row['mrna_length']

#get the gene-protein-reaction-associations
gpr = parse_gpr_information_for_protein2genes(row['gpr'])

# Create gene-protein-reaction associations
if enzyme_id not in protein2gene:
protein2gene[enzyme_id] = []
protein2gene[enzyme_id].append(gpr)

# Create gene2transcript dictionary
if gene_id not in gene2transcript.keys():
gene2transcript[gene_id] = {'id': f'mRNA_{gene_id}', 'length': mrna_length}

# Create rxn2protein dictionary
if rxn_id not in rxn2protein:
rxn2protein[rxn_id] = {}
if enzyme_id not in rxn2protein[rxn_id]:
rxn2protein[rxn_id][enzyme_id] = {
'f': kcat_f_b[0], # Forward kcat
'b': kcat_f_b[1], # Backward kcat
'molmass': row['molMass'],
'genes': [gene_id],
'complex_with': None
}
else:
rxn2protein[rxn_id][enzyme_id]['genes'].append(gene_id)

#if no enzyme info is found, add dummy enzyme with median kcat and molmass
for rxn in model.reactions:
if rxn.id not in rxn2protein.keys() and 'EX' not in rxn.id and 'BIOMASS' not in rxn.id and rxn.genes:
rev = _check_reaction_reversibility(rxn)
if rev == 0:
kcat_dict = {'f': 22}
elif rev ==1:
kcat_dict = {'b': 22}
else:
kcat_dict = {'f': 22,'b': 22}
# no enzyme information found
print('No enzyme information found for reaction: ' + rxn.id)
enzyme_id = 'Enzyme_'+rxn.id
rxn2protein[rxn.id] = {enzyme_id:{
**kcat_dict,
'molmass': 3.947778784340140e04}}
#add geneinfo for unknown enzymes
gpr_info = parse_gpr_information_for_protein2genes(rxn.gpr)
protein2gene[enzyme_id] = gpr_info

for gene in [gene for sublist in gpr_info for gene in sublist]:
if gene not in gene2transcript.keys():
gene2transcript[gene] = {'id': f'mRNA_{gene}', 'length': 750.0}

return rxn2protein, protein2gene, gene2transcript

def _check_rxn_identifier_format(rxn_id:str) -> str:
if 'pp' in rxn_id:
idx_new = rxn_id.replace('pp', '')
elif 'ex' in rxn_id:
idx_new = rxn_id.replace('ex', '')
else:
idx_new = rxn_id
return idx_new

def _get_fwd_bckw_kcat(rxn_id: str, kcat:float, model:PAModel) -> Union[list, None]:
#skip exchange and biomass reaction
if 'EX' in rxn_id or 'BIOMASS' in rxn_id:
return None

# Extract the base identifier without any suffixes
base_id = rxn_id.split('_')[0]

# Iterate over each identifier in the input
if base_id in model.reactions:
# Determine the form of the identifier
if rxn_id.endswith('_f'):
kcat_fwd = kcat
kcat_rev = 0
elif rxn_id.endswith('_b'):
kcat_fwd = 0
kcat_rev = kcat
# identifier has no suffix
elif base_id == rxn_id:
kcat_fwd = kcat
kcat_rev = kcat
else:
return None
elif rxn_id in model.reactions:
kcat_fwd = kcat
kcat_rev = kcat
else:
return None
return [kcat_fwd, kcat_rev]

def _check_reaction_reversibility(reaction):
if reaction.lower_bound >= 0:
# irreversible reaction (forward direction)
rev = 0
elif reaction.upper_bound <= 0:
# irreversible reaction (reverse direction)
rev = 1
else:
rev = 2
# reversible r
return rev

def parse_gpr_information_for_protein2genes(gpr_info:str):
#filter out nan entries
if isinstance(gpr_info, str):
nested_list = ast.literal_eval(gpr_info)
# Extracting the inner lists and removing parentheses
gpr_list = [[[item.strip("(')")] + sublist[1:] for item in sublist[0].split(" or ")] for sublist in nested_list]
return gpr_list[0]
else:
return [['gene_dummy']]
Loading

0 comments on commit 8c1b5fd

Please sign in to comment.