Changed gene-protein-reaction association parsing

iAMB-RWTH-Aachen · Jul 22, 2024 · 16d4d22 · 16d4d22
1 parent d81a39d
commit 16d4d22
Show file tree

Hide file tree

Showing 8 changed files with 636 additions and 651 deletions.
diff --git a/Scripts/__init__.py b/Scripts/__init__.py
diff --git a/Scripts/pam_generation.py b/Scripts/pam_generation.py
diff --git a/Scripts/pam_generation_new.py b/Scripts/pam_generation_new.py
diff --git a/Scripts/pam_generation_old.py b/Scripts/pam_generation_old.py
@@ -0,0 +1,330 @@
+import cobra
+import pandas as pd
+import os
+from typing import Union
+
+# load PAMpy modules
+from PAModelpy.PAModel import PAModel
+from PAModelpy.EnzymeSectors import ActiveEnzymeSector, UnusedEnzymeSector, TransEnzymeSector
+from PAModelpy.configuration import Config
+
+from Scripts.toy_ec_pam import build_toy_gem, build_active_enzyme_sector, build_translational_protein_sector, build_unused_protein_sector
+
+'Function library for making Protein Allocation Models as described in the publication'
+
+
+def set_up_toy_pam(sensitivity =True):
+    config = Config()
+    #setting the configuration for the toy model
+    config.BIOMASS_REACTION = 'R7'
+    config.GLUCOSE_EXCHANGE_RXNID = 'R1'
+    config.CO2_EXHANGE_RXNID = 'R8'
+    config.ACETATE_EXCRETION_RXNID = 'R9'
+
+    Etot = 0.6*1e-3
+    model = build_toy_gem()
+    active_enzyme = build_active_enzyme_sector(config)
+    unused_enzyme = build_unused_protein_sector(config)
+    translation_enzyme = build_translational_protein_sector(config)
+    pamodel = PAModel(model, name='toy model MCA with enzyme constraints', active_sector=active_enzyme,
+                      translational_sector=translation_enzyme,
+                      unused_sector=unused_enzyme, p_tot=Etot, sensitivity=sensitivity)
+    pamodel.objective = 'R7'
+    config.reset()
+    return pamodel
+
+def set_up_ecolicore_pam(total_protein:bool = True, active_enzymes: bool = True, translational_enzymes:bool = True, unused_enzymes:bool = True, sensitivity =True):
+    # Setting the relative paths
+    DATA_DIR = os.path.join(os.path.split(os.getcwd())[0], 'Data')
+    MODEL_DIR = os.path.join(os.path.split(os.getcwd())[0], 'Models')
+    PAM_DATA_FILE_PATH = os.path.join(DATA_DIR, 'proteinAllocationModel_iML1515_EnzymaticData_py.xls')
+
+    # some other constants
+    BIOMASS_REACTION = 'BIOMASS_Ecoli_core_w_GAM'
+    TOTAL_PROTEIN_CONCENTRATION = 0.16995  # [g_prot/g_cdw]
+
+    # load the genome-scale information
+    model = cobra.io.load_json_model(os.path.join(MODEL_DIR, 'e_coli_core.json'))
+
+    #load example data for the E.coli iML1515 model
+    if active_enzymes:
+        # load active enzyme sector information
+        enzyme_db = pd.read_excel(PAM_DATA_FILE_PATH, sheet_name='ActiveEnzymes')
+        enzyme_db = enzyme_db.set_index('rxnID')
+        # correct reaction IDS
+        for idx in enzyme_db.index.to_list():
+            # transprt reactions<
+
+            if 'pp' in idx:
+                idx_new = idx.replace('pp', '')
+                if idx_new not in enzyme_db.index:
+                    enzyme_db.rename(index={idx: idx_new}, inplace=True)
+            if 'ex' in idx:
+                idx_new = idx.replace('ex', '')
+                if idx_new not in enzyme_db.index:
+                    enzyme_db.rename(index={idx: idx_new}, inplace=True)
+
+                    # replace NaN values with unique identifiers
+        # replace NaN enzyme ids with a dummy enzyme identifier
+        # select the NaN values
+        nan_values = enzyme_db['EC_nmbr'].isnull()
+        # make a list with unique ids
+        nan_ids = [f'E{i}' for i in range(nan_values.sum())]
+        # replace nan values by unique id
+        enzyme_db.loc[nan_values, 'EC_nmbr'] = nan_ids
+
+        # create enzyme objects for each gene-associated reaction
+        kcats = {}
+        rxn2ec = {}
+        molmass = {}
+        for rxn in model.reactions:
+            if rxn.genes:
+                # correct transport reactions
+                if 't' in rxn.id:
+                    rxn.id = rxn.id
+                # are enzyme information in the PAM database?
+                rev = 0  # denotes reversibility
+                if rxn.lower_bound >= 0:
+                    # irreversible reaction (forward direction)
+                    rev = 0
+                    rxn_id = rxn.id  # save reaction ID for retrieveing molar masses/enzyme information later
+                    if rxn.id in enzyme_db.index:
+                        kcats[rxn.id] = {'f': enzyme_db.loc[rxn.id, 'kcat']}
+                elif rxn.upper_bound <= 0:
+                    # irreversible reaction (reverse direction)
+                    rev = 1
+                    rxn_id = rxn.id + '_b'
+                    if rxn_id in enzyme_db.index:
+                        kcats[rxn.id] = {'b': enzyme_db.loc[rxn_id, 'kcat']}
+                else:
+                    rev = 2
+                    # reversible reaction
+                    rxn_id_f = rxn.id + '_f'
+                    rxn_id_b = rxn.id + '_b'
+                    if rxn_id_f in enzyme_db.index and rxn_id_b in enzyme_db.index:
+                        rxn_id = rxn_id_f  # save reaction ID for retrieveing molar masses/enzyme information later
+                        kcats[rxn.id] = {'f': enzyme_db.loc[rxn_id_f, 'kcat'],
+                                         'b': enzyme_db.loc[rxn_id_b, 'kcat']}
+
+                    else:
+                        # try if only forward reaction is in database
+                        rxn_id = rxn.id  # save reaction ID for retrieveing molar masses/enzyme information later
+                        kcats[rxn.id] = {'f': enzyme_db.loc[rxn.id, 'kcat'],
+                                         'b': enzyme_db.loc[
+                                                  rxn.id, 'kcat'] / 2}  # deduce backwards kcat from forward value
+
+                # where enzyme information found?
+                if rxn.id in kcats.keys():
+                    # save molmass
+                    molmass[rxn.id] = enzyme_db.loc[rxn_id, 'molMass']
+                    # save enzyme information
+                    # is enzyme information NaN?
+                    if pd.isna(enzyme_db.loc[rxn_id, 'EC_nmbr']):
+                        rxn2ec[rxn.id] = ''
+                    else:
+                        rxn2ec[rxn.id] = enzyme_db.loc[rxn_id, 'EC_nmbr']
+
+
+                else:
+                    # no enzyme information found
+                    print('No enzyme information found for reaction: ' + rxn.id)
+                    # Create generic Enzyme with mean molar masses and kcat
+                    if rev == 0:
+                        kcats[rxn.id] = {'f': 22}
+                    elif rev == 1:
+                        kcats[rxn.id] = {'b': 22}
+                    else:
+                        kcats[rxn.id] = {'f': 22, 'b': 22}
+
+                    molmass[rxn.id] = 3.947778784340140e04
+
+        rxn2protein = {}
+        for rxn, ec in rxn2ec.items():
+            ec_dict = {**kcats[rxn], **{'molmass': molmass[rxn]}}
+            # add enzyme to enzymes related to reaction if these are already stored
+            if rxn in rxn2protein.keys():
+                rxn2protein[rxn] = {**rxn2protein[rxn], **{ec: ec_dict}}
+            # if not create new reaction entry
+            else:
+                rxn2protein[rxn] = {ec: ec_dict}
+
+        # create active enzymes sector
+        active_enzyme_sector = ActiveEnzymeSector(rxn2protein=rxn2protein)
+
+    else:
+        active_enzyme_sector = None
+
+    if translational_enzymes:
+        # translational protein sector parameter (substrate dependent)
+        id_list_tps = ['EX_glc__D_e']
+        tps_0 = [0.04992]  # g/gDW
+        tps_mu = [-0.002944]  # g h/gDW -> transformed to match glucose uptake variable
+        molmass_tps = [405903.94]  # g/mol
+
+        # translational protein sector
+        translation_enzyme_sector = TransEnzymeSector(
+            id_list=id_list_tps,
+            tps_0=tps_0,
+            tps_mu=tps_mu,
+            mol_mass=molmass_tps,
+        )
+    else:
+        translation_enzyme_sector = None
+
+    if unused_enzymes:
+        id_list_ups = [BIOMASS_REACTION]
+        ups_0 = [0.0407]  # g/gDW
+        ups_mu = [-0.0214]  # g h/gDW -> negative relation with growth rate
+        molmass_ups = [405903.94]  # g/mol
+
+        unused_enzyme_sector = UnusedEnzymeSector(
+            id_list=id_list_ups,
+            ups_0=ups_0,
+            ups_mu=ups_mu,
+            mol_mass=molmass_ups,
+        )
+    else:
+        unused_enzyme_sector = None
+
+    if total_protein: total_protein = TOTAL_PROTEIN_CONCENTRATION
+
+    pa_model = PAModel(id_or_model=model, p_tot=total_protein, sensitivity=sensitivity,
+                       active_sector=active_enzyme_sector, translational_sector=translation_enzyme_sector, unused_sector=unused_enzyme_sector)
+    return pa_model
+
+def set_up_ecoli_pam(total_protein: Union[bool, float] = True, active_enzymes: bool = True,
+                   translational_enzymes: bool = True, unused_enzymes: bool = True, sensitivity = True):
+
+    config = Config()
+    config.reset()
+    # Setting the relative paths
+    cwd = os.getcwd()
+    if os.path.split(cwd)[1] != 'Scripts':
+        BASE_DIR = cwd
+    else:
+        BASE_DIR = os.path.split(os.getcwd())[0]
+    MODEL_DIR = os.path.join(BASE_DIR, 'Models')
+    DATA_DIR = os.path.join(BASE_DIR, 'Data')
+    pam_info_file = os.path.join(DATA_DIR, 'proteinAllocationModel_iML1515_EnzymaticData_py.xls')
+
+    # some other constants
+    TOTAL_PROTEIN_CONCENTRATION = 0.258  # [g_prot/g_cdw]
+
+    #setup the gem ecoli iML1515 model
+    model = cobra.io.read_sbml_model(os.path.join(MODEL_DIR, 'iML1515.xml'))
+
+    #check if a different total protein concentration is given
+    if isinstance(total_protein, float):
+        TOTAL_PROTEIN_CONCENTRATION = total_protein
+
+    # load example data for the E.coli iML1515 model
+    if active_enzymes:
+        # load active enzyme sector information
+        active_enzyme_info_old = pd.read_excel(pam_info_file, sheet_name='ActiveEnzymes')
+
+        # replace NaN values with unique identifiers
+        # select the NaN values
+        nan_values = active_enzyme_info_old['EC_nmbr'].isnull()
+        # make a list with unique ids
+        nan_ids = [f'E{i}' for i in range(nan_values.sum())]
+        # replace nan values by unique id
+        active_enzyme_info_old.loc[nan_values, 'EC_nmbr'] = nan_ids
+
+        # parse the enzyme information (kcat values, identifiers and molmasses)
+        kcats_dict = active_enzyme_info_old.set_index(keys='rxnID').loc[:, 'kcat'].to_dict()
+        ec_dict = active_enzyme_info_old.set_index(keys='rxnID').loc[:, 'EC_nmbr'].to_dict()
+        molmass_dict = mol_mass = active_enzyme_info_old.set_index(keys='rxnID').loc[:, 'molMass'].to_dict()
+
+        kcats = {}
+        # save fwd and bckw kcats separately in the form of: {rxn_id: {'f': kcat_f, 'b': kcat_b}}
+        for rxn, kcat in kcats_dict.items():
+            # reversible reaction
+            if rxn[-2:] == '_f' or rxn[-2:] == '_b':
+                direction = rxn[-1]
+                # check if the reaction already exists in the kcat dictionary
+                try:
+                    kcats[rxn[:-2]][direction] = kcat
+                except:
+                    kcats[rxn[:-2]] = {direction: kcat}
+            # irreversible reaction
+            else:
+                kcats[rxn] = {'f': kcat}
+
+        rxn2ec = {}
+        # parse the enzyme identifiers for the reactions
+        for rxn, ec in ec_dict.items():
+            if rxn[-2:] == '_f' or rxn[-2:] == '_b':
+                rxn = rxn[:-2]
+            for enz in str(ec).split(','):
+                rxn2ec[rxn] = enz.strip()
+
+        molmass = {}
+        # parse the enzyme molmasses for the reactions
+        for rxn, mw in molmass_dict.items():
+            if rxn[-2:] == '_f' or rxn[-2:] == '_b':
+                rxn = rxn[:-2]
+            molmass[rxn] = mw
+
+        rxn2protein = {}
+        for rxn, ec in rxn2ec.items():
+            ec_dict = {**kcats[rxn], **{'molmass': molmass[rxn]}}
+            # add enzyme to enzymes related to reaction if these are already stored
+            if rxn in rxn2protein.keys():
+                rxn2protein[rxn] = {**rxn2protein[rxn], **{ec: ec_dict}}
+            # if not create new reaction entry
+            else:
+                rxn2protein[rxn] = {ec: ec_dict}
+
+        active_enzyme_sector = ActiveEnzymeSector(rxn2protein=rxn2protein, configuration = config)
+
+    else:
+        active_enzyme_sector = None
+
+    if translational_enzymes:
+        translational_info = pd.read_excel(pam_info_file, sheet_name='Translational')
+        translation_enzyme_sector = TransEnzymeSector(
+            id_list=[translational_info[translational_info.Parameter == 'id_list'].loc[0, 'Value']],
+            tps_0=[translational_info[translational_info.Parameter == 'tps_0'].loc[1, 'Value']],
+            tps_mu=[-translational_info[translational_info.Parameter == 'tps_mu'].loc[2, 'Value']],
+            mol_mass=[translational_info[translational_info.Parameter == 'mol_mass'].loc[3, 'Value']],
+            configuration = config)
+    else:
+        translation_enzyme_sector = None
+
+    if unused_enzymes:
+        unused_protein_info = pd.read_excel(pam_info_file, sheet_name='ExcessEnzymes')
+
+        ups_0 = unused_protein_info[unused_protein_info.Parameter == 'ups_0'].loc[2, 'Value']
+        smax = unused_protein_info[unused_protein_info.Parameter == 's_max_uptake'].loc[1, 'Value']
+
+        unused_protein_sector = UnusedEnzymeSector(
+            id_list=[unused_protein_info[unused_protein_info.Parameter == 'id_list'].loc[0, 'Value']],
+            ups_mu=[ups_0 / smax],
+            ups_0=[ups_0],
+            mol_mass=[unused_protein_info[unused_protein_info.Parameter == 'mol_mass'].loc[3, 'Value']],
+            configuration = config)
+    else:
+        unused_protein_sector = None
+
+    if total_protein: total_protein = TOTAL_PROTEIN_CONCENTRATION
+
+    pamodel = PAModel(id_or_model=model, p_tot=total_protein,
+                       active_sector=active_enzyme_sector, translational_sector=translation_enzyme_sector,
+                       unused_sector=unused_protein_sector, sensitivity=sensitivity, configuration = config
+                      )
+    return pamodel
+
+def parse_coefficients(pamodel):
+    Ccsc = list()
+
+    for csc in ['flux_ub', 'flux_lb', 'enzyme_max', 'enzyme_min', 'proteome', 'sector']:
+        Ccsc += pamodel.capacity_sensitivity_coefficients[
+            pamodel.capacity_sensitivity_coefficients['constraint'] == csc].coefficient.to_list()
+
+    Cesc = pamodel.enzyme_sensitivity_coefficients.coefficient.to_list()
+
+    return Ccsc, Cesc
+
+def parse_esc(pamodel):
+    return pamodel.enzyme_sensitivity_coefficients.coefficient.to_list()
+
diff --git a/src/PAModelpy/CatalyticEvent.py b/src/PAModelpy/CatalyticEvent.py
@@ -247,7 +247,7 @@ def add_enzymes(self,enzyme_kcat_dict: dict):
                         })
 
     def add_enzyme_reaction_association(self, enzyme):
-        catalytic_reaction = Reaction(self.id + "_" + enzyme.id)
+        catalytic_reaction = Reaction(self.id + "_" + enzyme.id, lower_bound=-1e3, upper_bound=1e3)
         self._model.add_reactions([catalytic_reaction])
         self.catalytic_reactions.append(catalytic_reaction)
         self._model.constraints[self.id].set_linear_coefficients({
@@ -332,10 +332,10 @@ def remove_enzymes(self, enzyme_list: list):
 
     def change_kcat_values(self, enzyme_kcat_dict : dict):
         """changes kcat values for the enzyme variable
-        Parameters
-        ----------
+
+        Args:
         enzyme_kcat_dict: nested Dict
-            A Dict with enzyme, kcat key, value pairs to connect the
+            A Dict with enzyme_id, kcat key, value pairs to connect the
             enzyme with the associated reaction the kcat is another dict with 'f' and 'b'
             for the forward and backward reactions respectively.
         
@@ -359,12 +359,12 @@ def change_kcat_values(self, enzyme_kcat_dict : dict):
 
             enzyme_var = self._model.enzyme_variables.get_by_id(enzyme)
             if enzyme_var not in self.enzyme_variables: self.enzyme_variables.add(enzyme_var)
-            ce_reaction = self._model.reactions.get_by_id(self.rxn_id+"_"+enzyme.id)
+            ce_reaction = self._model.reactions.get_by_id('CE_'+self.rxn_id+"_"+enzyme)
             enzyme_var.change_kcat_values({ce_reaction: kcat_dict})
             for direction, kcat in kcats_change.items():
                 #change enzyme variable
                 enzyme_var.kcats[ce_reaction.id][direction] = kcat
-                self._model._change_kcat_in_enzyme_constraint(ce_reaction, enzyme,direction, kcat)
+                self._model._change_kcat_in_enzyme_constraint(ce_reaction, enzyme, direction, kcat)
 
     def __copy__(self) -> 'CatalyticEvent':
         """ Copy the CatalyticEvent

diff --git a/src/PAModelpy/Enzyme.py b/src/PAModelpy/Enzyme.py
@@ -273,7 +273,7 @@ def get_kcat_values(self, rxn_ids: Union[str, list] = None) -> Dict:
                     warn("No reaction {0} found".format(rxn_id))
                 else:
                     # get kcat values
-                    rxn2kcat[rxn_id] = self.rxn2kcat[rxn_id]
+                    rxn2kcat[rxn_id] = self.rxn2kcat[catalytic_event_id+'_'+self.id]
 
         if len(rxn_ids) == 1:
             return rxn2kcat[rxn_id]

diff --git a/tests/unit_tests/test_pamodel/test_pam_generation.py b/tests/unit_tests/test_pamodel/test_pam_generation.py
@@ -4,7 +4,7 @@
 
 
 from Scripts.toy_ec_pam import build_toy_gem, build_active_enzyme_sector, build_translational_protein_sector, build_unused_protein_sector
-from Scripts.pam_generation_new import (set_up_ecolicore_pam, set_up_ecoli_pam, set_up_toy_pam,
+from Scripts.pam_generation import (set_up_ecolicore_pam, set_up_ecoli_pam, set_up_toy_pam,
                                         parse_gpr_information_for_protein2genes,
                                         parse_gpr_information_for_rxn2protein)