Skip to content

Commit

Permalink
Changed gene-protein-reaction association parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
SamiralVdB committed Jul 22, 2024
1 parent d81a39d commit 16d4d22
Show file tree
Hide file tree
Showing 8 changed files with 636 additions and 651 deletions.
Empty file added Scripts/__init__.py
Empty file.
467 changes: 287 additions & 180 deletions Scripts/pam_generation.py

Large diffs are not rendered by default.

452 changes: 0 additions & 452 deletions Scripts/pam_generation_new.py

This file was deleted.

330 changes: 330 additions & 0 deletions Scripts/pam_generation_old.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,330 @@
import cobra
import pandas as pd
import os
from typing import Union

# load PAMpy modules
from PAModelpy.PAModel import PAModel
from PAModelpy.EnzymeSectors import ActiveEnzymeSector, UnusedEnzymeSector, TransEnzymeSector
from PAModelpy.configuration import Config

from Scripts.toy_ec_pam import build_toy_gem, build_active_enzyme_sector, build_translational_protein_sector, build_unused_protein_sector

'Function library for making Protein Allocation Models as described in the publication'


def set_up_toy_pam(sensitivity =True):
config = Config()
#setting the configuration for the toy model
config.BIOMASS_REACTION = 'R7'
config.GLUCOSE_EXCHANGE_RXNID = 'R1'
config.CO2_EXHANGE_RXNID = 'R8'
config.ACETATE_EXCRETION_RXNID = 'R9'

Etot = 0.6*1e-3
model = build_toy_gem()
active_enzyme = build_active_enzyme_sector(config)
unused_enzyme = build_unused_protein_sector(config)
translation_enzyme = build_translational_protein_sector(config)
pamodel = PAModel(model, name='toy model MCA with enzyme constraints', active_sector=active_enzyme,
translational_sector=translation_enzyme,
unused_sector=unused_enzyme, p_tot=Etot, sensitivity=sensitivity)
pamodel.objective = 'R7'
config.reset()
return pamodel

def set_up_ecolicore_pam(total_protein:bool = True, active_enzymes: bool = True, translational_enzymes:bool = True, unused_enzymes:bool = True, sensitivity =True):
# Setting the relative paths
DATA_DIR = os.path.join(os.path.split(os.getcwd())[0], 'Data')
MODEL_DIR = os.path.join(os.path.split(os.getcwd())[0], 'Models')
PAM_DATA_FILE_PATH = os.path.join(DATA_DIR, 'proteinAllocationModel_iML1515_EnzymaticData_py.xls')

# some other constants
BIOMASS_REACTION = 'BIOMASS_Ecoli_core_w_GAM'
TOTAL_PROTEIN_CONCENTRATION = 0.16995 # [g_prot/g_cdw]

# load the genome-scale information
model = cobra.io.load_json_model(os.path.join(MODEL_DIR, 'e_coli_core.json'))

#load example data for the E.coli iML1515 model
if active_enzymes:
# load active enzyme sector information
enzyme_db = pd.read_excel(PAM_DATA_FILE_PATH, sheet_name='ActiveEnzymes')
enzyme_db = enzyme_db.set_index('rxnID')
# correct reaction IDS
for idx in enzyme_db.index.to_list():
# transprt reactions<

if 'pp' in idx:
idx_new = idx.replace('pp', '')
if idx_new not in enzyme_db.index:
enzyme_db.rename(index={idx: idx_new}, inplace=True)
if 'ex' in idx:
idx_new = idx.replace('ex', '')
if idx_new not in enzyme_db.index:
enzyme_db.rename(index={idx: idx_new}, inplace=True)

# replace NaN values with unique identifiers
# replace NaN enzyme ids with a dummy enzyme identifier
# select the NaN values
nan_values = enzyme_db['EC_nmbr'].isnull()
# make a list with unique ids
nan_ids = [f'E{i}' for i in range(nan_values.sum())]
# replace nan values by unique id
enzyme_db.loc[nan_values, 'EC_nmbr'] = nan_ids

# create enzyme objects for each gene-associated reaction
kcats = {}
rxn2ec = {}
molmass = {}
for rxn in model.reactions:
if rxn.genes:
# correct transport reactions
if 't' in rxn.id:
rxn.id = rxn.id
# are enzyme information in the PAM database?
rev = 0 # denotes reversibility
if rxn.lower_bound >= 0:
# irreversible reaction (forward direction)
rev = 0
rxn_id = rxn.id # save reaction ID for retrieveing molar masses/enzyme information later
if rxn.id in enzyme_db.index:
kcats[rxn.id] = {'f': enzyme_db.loc[rxn.id, 'kcat']}
elif rxn.upper_bound <= 0:
# irreversible reaction (reverse direction)
rev = 1
rxn_id = rxn.id + '_b'
if rxn_id in enzyme_db.index:
kcats[rxn.id] = {'b': enzyme_db.loc[rxn_id, 'kcat']}
else:
rev = 2
# reversible reaction
rxn_id_f = rxn.id + '_f'
rxn_id_b = rxn.id + '_b'
if rxn_id_f in enzyme_db.index and rxn_id_b in enzyme_db.index:
rxn_id = rxn_id_f # save reaction ID for retrieveing molar masses/enzyme information later
kcats[rxn.id] = {'f': enzyme_db.loc[rxn_id_f, 'kcat'],
'b': enzyme_db.loc[rxn_id_b, 'kcat']}

else:
# try if only forward reaction is in database
rxn_id = rxn.id # save reaction ID for retrieveing molar masses/enzyme information later
kcats[rxn.id] = {'f': enzyme_db.loc[rxn.id, 'kcat'],
'b': enzyme_db.loc[
rxn.id, 'kcat'] / 2} # deduce backwards kcat from forward value

# where enzyme information found?
if rxn.id in kcats.keys():
# save molmass
molmass[rxn.id] = enzyme_db.loc[rxn_id, 'molMass']
# save enzyme information
# is enzyme information NaN?
if pd.isna(enzyme_db.loc[rxn_id, 'EC_nmbr']):
rxn2ec[rxn.id] = ''
else:
rxn2ec[rxn.id] = enzyme_db.loc[rxn_id, 'EC_nmbr']


else:
# no enzyme information found
print('No enzyme information found for reaction: ' + rxn.id)
# Create generic Enzyme with mean molar masses and kcat
if rev == 0:
kcats[rxn.id] = {'f': 22}
elif rev == 1:
kcats[rxn.id] = {'b': 22}
else:
kcats[rxn.id] = {'f': 22, 'b': 22}

molmass[rxn.id] = 3.947778784340140e04

rxn2protein = {}
for rxn, ec in rxn2ec.items():
ec_dict = {**kcats[rxn], **{'molmass': molmass[rxn]}}
# add enzyme to enzymes related to reaction if these are already stored
if rxn in rxn2protein.keys():
rxn2protein[rxn] = {**rxn2protein[rxn], **{ec: ec_dict}}
# if not create new reaction entry
else:
rxn2protein[rxn] = {ec: ec_dict}

# create active enzymes sector
active_enzyme_sector = ActiveEnzymeSector(rxn2protein=rxn2protein)

else:
active_enzyme_sector = None

if translational_enzymes:
# translational protein sector parameter (substrate dependent)
id_list_tps = ['EX_glc__D_e']
tps_0 = [0.04992] # g/gDW
tps_mu = [-0.002944] # g h/gDW -> transformed to match glucose uptake variable
molmass_tps = [405903.94] # g/mol

# translational protein sector
translation_enzyme_sector = TransEnzymeSector(
id_list=id_list_tps,
tps_0=tps_0,
tps_mu=tps_mu,
mol_mass=molmass_tps,
)
else:
translation_enzyme_sector = None

if unused_enzymes:
id_list_ups = [BIOMASS_REACTION]
ups_0 = [0.0407] # g/gDW
ups_mu = [-0.0214] # g h/gDW -> negative relation with growth rate
molmass_ups = [405903.94] # g/mol

unused_enzyme_sector = UnusedEnzymeSector(
id_list=id_list_ups,
ups_0=ups_0,
ups_mu=ups_mu,
mol_mass=molmass_ups,
)
else:
unused_enzyme_sector = None

if total_protein: total_protein = TOTAL_PROTEIN_CONCENTRATION

pa_model = PAModel(id_or_model=model, p_tot=total_protein, sensitivity=sensitivity,
active_sector=active_enzyme_sector, translational_sector=translation_enzyme_sector, unused_sector=unused_enzyme_sector)
return pa_model

def set_up_ecoli_pam(total_protein: Union[bool, float] = True, active_enzymes: bool = True,
translational_enzymes: bool = True, unused_enzymes: bool = True, sensitivity = True):

config = Config()
config.reset()
# Setting the relative paths
cwd = os.getcwd()
if os.path.split(cwd)[1] != 'Scripts':
BASE_DIR = cwd
else:
BASE_DIR = os.path.split(os.getcwd())[0]
MODEL_DIR = os.path.join(BASE_DIR, 'Models')
DATA_DIR = os.path.join(BASE_DIR, 'Data')
pam_info_file = os.path.join(DATA_DIR, 'proteinAllocationModel_iML1515_EnzymaticData_py.xls')

# some other constants
TOTAL_PROTEIN_CONCENTRATION = 0.258 # [g_prot/g_cdw]

#setup the gem ecoli iML1515 model
model = cobra.io.read_sbml_model(os.path.join(MODEL_DIR, 'iML1515.xml'))

#check if a different total protein concentration is given
if isinstance(total_protein, float):
TOTAL_PROTEIN_CONCENTRATION = total_protein

# load example data for the E.coli iML1515 model
if active_enzymes:
# load active enzyme sector information
active_enzyme_info_old = pd.read_excel(pam_info_file, sheet_name='ActiveEnzymes')

# replace NaN values with unique identifiers
# select the NaN values
nan_values = active_enzyme_info_old['EC_nmbr'].isnull()
# make a list with unique ids
nan_ids = [f'E{i}' for i in range(nan_values.sum())]
# replace nan values by unique id
active_enzyme_info_old.loc[nan_values, 'EC_nmbr'] = nan_ids

# parse the enzyme information (kcat values, identifiers and molmasses)
kcats_dict = active_enzyme_info_old.set_index(keys='rxnID').loc[:, 'kcat'].to_dict()
ec_dict = active_enzyme_info_old.set_index(keys='rxnID').loc[:, 'EC_nmbr'].to_dict()
molmass_dict = mol_mass = active_enzyme_info_old.set_index(keys='rxnID').loc[:, 'molMass'].to_dict()

kcats = {}
# save fwd and bckw kcats separately in the form of: {rxn_id: {'f': kcat_f, 'b': kcat_b}}
for rxn, kcat in kcats_dict.items():
# reversible reaction
if rxn[-2:] == '_f' or rxn[-2:] == '_b':
direction = rxn[-1]
# check if the reaction already exists in the kcat dictionary
try:
kcats[rxn[:-2]][direction] = kcat
except:
kcats[rxn[:-2]] = {direction: kcat}
# irreversible reaction
else:
kcats[rxn] = {'f': kcat}

rxn2ec = {}
# parse the enzyme identifiers for the reactions
for rxn, ec in ec_dict.items():
if rxn[-2:] == '_f' or rxn[-2:] == '_b':
rxn = rxn[:-2]
for enz in str(ec).split(','):
rxn2ec[rxn] = enz.strip()

molmass = {}
# parse the enzyme molmasses for the reactions
for rxn, mw in molmass_dict.items():
if rxn[-2:] == '_f' or rxn[-2:] == '_b':
rxn = rxn[:-2]
molmass[rxn] = mw

rxn2protein = {}
for rxn, ec in rxn2ec.items():
ec_dict = {**kcats[rxn], **{'molmass': molmass[rxn]}}
# add enzyme to enzymes related to reaction if these are already stored
if rxn in rxn2protein.keys():
rxn2protein[rxn] = {**rxn2protein[rxn], **{ec: ec_dict}}
# if not create new reaction entry
else:
rxn2protein[rxn] = {ec: ec_dict}

active_enzyme_sector = ActiveEnzymeSector(rxn2protein=rxn2protein, configuration = config)

else:
active_enzyme_sector = None

if translational_enzymes:
translational_info = pd.read_excel(pam_info_file, sheet_name='Translational')
translation_enzyme_sector = TransEnzymeSector(
id_list=[translational_info[translational_info.Parameter == 'id_list'].loc[0, 'Value']],
tps_0=[translational_info[translational_info.Parameter == 'tps_0'].loc[1, 'Value']],
tps_mu=[-translational_info[translational_info.Parameter == 'tps_mu'].loc[2, 'Value']],
mol_mass=[translational_info[translational_info.Parameter == 'mol_mass'].loc[3, 'Value']],
configuration = config)
else:
translation_enzyme_sector = None

if unused_enzymes:
unused_protein_info = pd.read_excel(pam_info_file, sheet_name='ExcessEnzymes')

ups_0 = unused_protein_info[unused_protein_info.Parameter == 'ups_0'].loc[2, 'Value']
smax = unused_protein_info[unused_protein_info.Parameter == 's_max_uptake'].loc[1, 'Value']

unused_protein_sector = UnusedEnzymeSector(
id_list=[unused_protein_info[unused_protein_info.Parameter == 'id_list'].loc[0, 'Value']],
ups_mu=[ups_0 / smax],
ups_0=[ups_0],
mol_mass=[unused_protein_info[unused_protein_info.Parameter == 'mol_mass'].loc[3, 'Value']],
configuration = config)
else:
unused_protein_sector = None

if total_protein: total_protein = TOTAL_PROTEIN_CONCENTRATION

pamodel = PAModel(id_or_model=model, p_tot=total_protein,
active_sector=active_enzyme_sector, translational_sector=translation_enzyme_sector,
unused_sector=unused_protein_sector, sensitivity=sensitivity, configuration = config
)
return pamodel

def parse_coefficients(pamodel):
Ccsc = list()

for csc in ['flux_ub', 'flux_lb', 'enzyme_max', 'enzyme_min', 'proteome', 'sector']:
Ccsc += pamodel.capacity_sensitivity_coefficients[
pamodel.capacity_sensitivity_coefficients['constraint'] == csc].coefficient.to_list()

Cesc = pamodel.enzyme_sensitivity_coefficients.coefficient.to_list()

return Ccsc, Cesc

def parse_esc(pamodel):
return pamodel.enzyme_sensitivity_coefficients.coefficient.to_list()

12 changes: 6 additions & 6 deletions src/PAModelpy/CatalyticEvent.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def add_enzymes(self,enzyme_kcat_dict: dict):
})

def add_enzyme_reaction_association(self, enzyme):
catalytic_reaction = Reaction(self.id + "_" + enzyme.id)
catalytic_reaction = Reaction(self.id + "_" + enzyme.id, lower_bound=-1e3, upper_bound=1e3)
self._model.add_reactions([catalytic_reaction])
self.catalytic_reactions.append(catalytic_reaction)
self._model.constraints[self.id].set_linear_coefficients({
Expand Down Expand Up @@ -332,10 +332,10 @@ def remove_enzymes(self, enzyme_list: list):

def change_kcat_values(self, enzyme_kcat_dict : dict):
"""changes kcat values for the enzyme variable
Parameters
----------
Args:
enzyme_kcat_dict: nested Dict
A Dict with enzyme, kcat key, value pairs to connect the
A Dict with enzyme_id, kcat key, value pairs to connect the
enzyme with the associated reaction the kcat is another dict with 'f' and 'b'
for the forward and backward reactions respectively.
Expand All @@ -359,12 +359,12 @@ def change_kcat_values(self, enzyme_kcat_dict : dict):

enzyme_var = self._model.enzyme_variables.get_by_id(enzyme)
if enzyme_var not in self.enzyme_variables: self.enzyme_variables.add(enzyme_var)
ce_reaction = self._model.reactions.get_by_id(self.rxn_id+"_"+enzyme.id)
ce_reaction = self._model.reactions.get_by_id('CE_'+self.rxn_id+"_"+enzyme)
enzyme_var.change_kcat_values({ce_reaction: kcat_dict})
for direction, kcat in kcats_change.items():
#change enzyme variable
enzyme_var.kcats[ce_reaction.id][direction] = kcat
self._model._change_kcat_in_enzyme_constraint(ce_reaction, enzyme,direction, kcat)
self._model._change_kcat_in_enzyme_constraint(ce_reaction, enzyme, direction, kcat)

def __copy__(self) -> 'CatalyticEvent':
""" Copy the CatalyticEvent
Expand Down
2 changes: 1 addition & 1 deletion src/PAModelpy/Enzyme.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ def get_kcat_values(self, rxn_ids: Union[str, list] = None) -> Dict:
warn("No reaction {0} found".format(rxn_id))
else:
# get kcat values
rxn2kcat[rxn_id] = self.rxn2kcat[rxn_id]
rxn2kcat[rxn_id] = self.rxn2kcat[catalytic_event_id+'_'+self.id]

if len(rxn_ids) == 1:
return rxn2kcat[rxn_id]
Expand Down
2 changes: 1 addition & 1 deletion tests/unit_tests/test_pamodel/test_pam_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


from Scripts.toy_ec_pam import build_toy_gem, build_active_enzyme_sector, build_translational_protein_sector, build_unused_protein_sector
from Scripts.pam_generation_new import (set_up_ecolicore_pam, set_up_ecoli_pam, set_up_toy_pam,
from Scripts.pam_generation import (set_up_ecolicore_pam, set_up_ecoli_pam, set_up_toy_pam,
parse_gpr_information_for_protein2genes,
parse_gpr_information_for_rxn2protein)

Expand Down
Loading

0 comments on commit 16d4d22

Please sign in to comment.