From 1c0c5aa076dbd706ff2a91d7aa69a5da144291d3 Mon Sep 17 00:00:00 2001 From: kkaris Date: Fri, 8 Sep 2023 13:56:29 -0700 Subject: [PATCH 01/14] Update download base url --- indra/sources/ubibrowser/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/indra/sources/ubibrowser/api.py b/indra/sources/ubibrowser/api.py index e2a8fea4e3..e9690a9a28 100644 --- a/indra/sources/ubibrowser/api.py +++ b/indra/sources/ubibrowser/api.py @@ -4,7 +4,7 @@ from .processor import UbiBrowserProcessor -DOWNLOAD_URL = 'http://ubibrowser.ncpsb.org.cn/v2/Public/download/literature/' +DOWNLOAD_URL = 'http://ubibrowser.bio-it.cn/ubibrowser_v3/Public/download/literature/' E3_URL = DOWNLOAD_URL + 'literature.E3.txt' DUB_URL = DOWNLOAD_URL + 'literature.DUB.txt' From 6b644a1a1eea8429ab7641198a81d90fe7527ceb Mon Sep 17 00:00:00 2001 From: Ben Gyori Date: Fri, 8 Sep 2023 18:05:26 -0400 Subject: [PATCH 02/14] Handle PubChem SIDs in SIGNOR --- indra/sources/signor/processor.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/indra/sources/signor/processor.py b/indra/sources/signor/processor.py index 91fd1f33c2..5a163c6d3b 100644 --- a/indra/sources/signor/processor.py +++ b/indra/sources/signor/processor.py @@ -246,7 +246,11 @@ def _get_agent(self, ent_name, ent_type, id, database): # SIGNOR's format in which it leaves extra spaces around # the ID, as in 'CID: 923' id = id[4:].strip() - elif database == 'ChEBI' and id.startswith('SID:'): + # In older releases PubChem substance IDs were used with + # ChEBI as the source, these were later changed to use + # PUBCHEM + elif database in {'ChEBI', 'PUBCHEM'} \ + and id.startswith('SID:'): gnd_type = 'PUBCHEM.SUBSTANCE' id = id[4:].strip() db_refs = {gnd_type: id} From 1ea722c0887b10190baccbae4d9efabcee1be2b0 Mon Sep 17 00:00:00 2001 From: Ben Gyori Date: Sat, 9 Sep 2023 15:35:00 -0400 Subject: [PATCH 03/14] Update UbiBrowser column names --- indra/sources/ubibrowser/processor.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/indra/sources/ubibrowser/processor.py b/indra/sources/ubibrowser/processor.py index 0403fa8d2e..db5ecfb4ac 100644 --- a/indra/sources/ubibrowser/processor.py +++ b/indra/sources/ubibrowser/processor.py @@ -23,10 +23,12 @@ def _process_row(row, stmt_type): # is called "E3" # There are some examples where a complex is implied (e.g., BMI1-RNF2), # for simplicity we just ignore these - if '-' in row['E3AC']: + if '-' in row['SwissProt AC (E3)']: return None - subj_agent = get_standard_agent(row['E3GENE'], {'UP': row['E3AC']}) - obj_agent = get_standard_agent(row['SUBGENE'], {'UP': row['SUBAC']}) + subj_agent = get_standard_agent(row['Gene Symbol (E3)'], + {'UP': row['SwissProt AC (E3)']}) + obj_agent = get_standard_agent(row['Gene Symbol (Substrate)'], + {'UP': row['SwissProt AC (Substrate)']}) if row['SOURCE'] == 'MEDLINE' and row['SOURCEID'] != 'UNIPROT': # Note: we sometimes get int here pmid = str(row['SOURCEID']) From dfea3cd0e4b846470a8ae3a978b8cca0bed9a5b9 Mon Sep 17 00:00:00 2001 From: Ben Gyori Date: Sat, 9 Sep 2023 15:46:55 -0400 Subject: [PATCH 04/14] Adapt to more column changes and corner cases --- indra/sources/ubibrowser/processor.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/indra/sources/ubibrowser/processor.py b/indra/sources/ubibrowser/processor.py index db5ecfb4ac..68182fb763 100644 --- a/indra/sources/ubibrowser/processor.py +++ b/indra/sources/ubibrowser/processor.py @@ -10,23 +10,31 @@ def __init__(self, e3_df, dub_df): self.statements = [] def extract_statements(self): - for df, stmt_type in [(self.e3_df, Ubiquitination), - (self.dub_df, Deubiquitination)]: + for df, stmt_type, subj_suffix in \ + [(self.e3_df, Ubiquitination, 'E3'), + (self.dub_df, Deubiquitination, 'DUB')]: for _, row in df.iterrows(): - stmt = self._process_row(row, stmt_type) + stmt = self._process_row(row, stmt_type, subj_suffix) if stmt: self.statements.append(stmt) @staticmethod - def _process_row(row, stmt_type): + def _process_row(row, stmt_type, subj_suffix): # Note that even in the DUB table the subject of the statement # is called "E3" # There are some examples where a complex is implied (e.g., BMI1-RNF2), # for simplicity we just ignore these - if '-' in row['SwissProt AC (E3)']: + if '#' in row[f'SwissProt AC ({subj_suffix})']: return None - subj_agent = get_standard_agent(row['Gene Symbol (E3)'], - {'UP': row['SwissProt AC (E3)']}) + # Interestingly, some of the E3s are missing entirely, we skip these + elif row[f'SwissProt AC ({subj_suffix})'] == '-': + return None + # Some of the same corner cases apply to the substrate as well + if row['SwissProt AC (Substrate)'] == '-': + return None + subj_agent = \ + get_standard_agent(row[f'Gene Symbol ({subj_suffix})'], + {'UP': row[f'SwissProt AC ({subj_suffix})']}) obj_agent = get_standard_agent(row['Gene Symbol (Substrate)'], {'UP': row['SwissProt AC (Substrate)']}) if row['SOURCE'] == 'MEDLINE' and row['SOURCEID'] != 'UNIPROT': From f3b1787b1087cbabfd45fea08792b1246f79fe87 Mon Sep 17 00:00:00 2001 From: kkaris Date: Mon, 11 Sep 2023 15:46:34 -0700 Subject: [PATCH 05/14] Update version in sources.bel.api --- indra/sources/bel/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/indra/sources/bel/api.py b/indra/sources/bel/api.py index fb1621c54d..646b1f14cc 100644 --- a/indra/sources/bel/api.py +++ b/indra/sources/bel/api.py @@ -17,7 +17,7 @@ logger = logging.getLogger(__name__) -version = 'v1.0.0' +version = 'v1.1.1' branch = 'https://github.com/cthoyt/selventa-knowledge/raw/' \ '{}/selventa_knowledge/{}' large_corpus_url = branch.format(version, 'large_corpus.bel.nodelink.json.gz') From a58d0a86106585c921071302c6141e67187b028b Mon Sep 17 00:00:00 2001 From: kkaris Date: Tue, 12 Sep 2023 16:26:58 -0700 Subject: [PATCH 06/14] Update version --- indra/sources/bel/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/indra/sources/bel/api.py b/indra/sources/bel/api.py index 646b1f14cc..a0de304502 100644 --- a/indra/sources/bel/api.py +++ b/indra/sources/bel/api.py @@ -17,7 +17,7 @@ logger = logging.getLogger(__name__) -version = 'v1.1.1' +version = 'v1.1.2' branch = 'https://github.com/cthoyt/selventa-knowledge/raw/' \ '{}/selventa_knowledge/{}' large_corpus_url = branch.format(version, 'large_corpus.bel.nodelink.json.gz') From 34ca6443a320459afca90b952085473ef2031407 Mon Sep 17 00:00:00 2001 From: Ben Gyori Date: Tue, 19 Sep 2023 23:08:00 -0400 Subject: [PATCH 07/14] Handle corner case for MeSH IDs --- indra/sources/rlimsp/processor.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/indra/sources/rlimsp/processor.py b/indra/sources/rlimsp/processor.py index d17df1ef24..7587354c8b 100644 --- a/indra/sources/rlimsp/processor.py +++ b/indra/sources/rlimsp/processor.py @@ -251,7 +251,10 @@ def get_agent_from_entity_info(entity_info): refs['CHEBI'] = 'CHEBI:%s' % id_dict['idString'] # These we take as is elif id_dict['source'] in ('MESH', 'OMIM'): - refs[id_dict['source']] = id_dict['idString'] + if ';' in id_dict['idString']: + refs[id_dict['source']] = id_dict['idString'].split(';')[0] + else: + refs[id_dict['source']] = id_dict['idString'] # CTD is sometimes used for MESH chemical IDs but can also be just '-' elif id_dict['source'] == 'CTD': if id_dict['idString'] != '-': From 66ba51e2177f5fcdb5e7ea6e8e8b37ba17f2d24a Mon Sep 17 00:00:00 2001 From: kkaris Date: Fri, 15 Sep 2023 08:31:00 -0700 Subject: [PATCH 08/14] Update to https for CTD url --- indra/sources/ctd/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/indra/sources/ctd/api.py b/indra/sources/ctd/api.py index 2ee8055851..a46d07f62c 100644 --- a/indra/sources/ctd/api.py +++ b/indra/sources/ctd/api.py @@ -2,7 +2,7 @@ from .processor import CTDProcessor, CTDChemicalDiseaseProcessor, \ CTDGeneDiseaseProcessor, CTDChemicalGeneProcessor -base_url = 'http://ctdbase.org/reports/' +base_url = 'https://ctdbase.org/reports/' urls = { 'chemical_gene': base_url + 'CTD_chem_gene_ixns.tsv.gz', From 255c44ac67b0a42c37c43f51121e2d0425eccacc Mon Sep 17 00:00:00 2001 From: kkaris Date: Fri, 22 Sep 2023 08:22:22 -0700 Subject: [PATCH 09/14] Replace gilda service with local gilda --- indra/sources/phosphoelm/processor.py | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/indra/sources/phosphoelm/processor.py b/indra/sources/phosphoelm/processor.py index 9fb7e0b572..2f7f24ef60 100644 --- a/indra/sources/phosphoelm/processor.py +++ b/indra/sources/phosphoelm/processor.py @@ -1,13 +1,13 @@ import logging import requests +import gilda from indra.databases import uniprot_client, hgnc_client from indra.statements.validate import validate_text_refs from indra.statements import Phosphorylation, Evidence, Agent from .phosphoelm_mapping import phosphoelm_mapping -gilda_url = 'http://grounding.indra.bio/ground' logger = logging.getLogger(__name__) @@ -156,17 +156,12 @@ def _agent_from_str(txt): def _gilda_grounder(txt): # Pre-process text for grounding - txt = txt.replace('_group', '') - txt = txt.replace('_', '-') - txt = txt.split('/')[0] - res = requests.post(gilda_url, json={'text': txt}) - if res.status_code != 200: - logger.warning('Gilda service responded with status code %d' % - res.status_code) + txt = txt.replace("_group", "") + txt = txt.replace("_", "-") + txt = txt.split("/")[0] + res = gilda.ground(txt) + if not res: + logger.warning(f"Gilda grounder returned no results for {txt}") return None - rj = res.json() - if not rj: - return None - top_term = rj[0]['term'] - return top_term - + top_term = res[0].term + return top_term.to_json() From 89d560a92ffcf2cad7a9c43710955bc633caaec2 Mon Sep 17 00:00:00 2001 From: kkaris Date: Fri, 22 Sep 2023 08:22:38 -0700 Subject: [PATCH 10/14] Add tqdm to processing --- indra/sources/phosphoelm/processor.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/indra/sources/phosphoelm/processor.py b/indra/sources/phosphoelm/processor.py index 2f7f24ef60..4b8b9aad91 100644 --- a/indra/sources/phosphoelm/processor.py +++ b/indra/sources/phosphoelm/processor.py @@ -1,5 +1,5 @@ import logging -import requests +from tqdm import tqdm import gilda from indra.databases import uniprot_client, hgnc_client @@ -40,7 +40,8 @@ def process_phosphorylations(self, skip_empty=True): Default: True. If False, also create statements when upstream kinases in entry['kinases'] are not known. """ - for entry in self._phosphoelm_data: + logger.info("Processing Phospho.ELM phosphorylations") + for entry in tqdm(self._phosphoelm_data): if entry['species'].lower() != 'homo sapiens' or\ skip_empty and not entry['kinases']: # Skip entries without any kinases or if species is other From 46ba31f56d30d691e5c5be0aca1ab45f15358f54 Mon Sep 17 00:00:00 2001 From: kkaris Date: Fri, 22 Sep 2023 08:23:53 -0700 Subject: [PATCH 11/14] Add tqdm to file processing in phosphoelm --- indra/sources/phosphoelm/api.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/indra/sources/phosphoelm/api.py b/indra/sources/phosphoelm/api.py index b8cc7bd03c..6b542f19a4 100644 --- a/indra/sources/phosphoelm/api.py +++ b/indra/sources/phosphoelm/api.py @@ -1,6 +1,8 @@ import csv import logging +from tqdm import tqdm + from .processor import PhosphoElmProcessor logger = logging.getLogger(__name__) @@ -36,7 +38,8 @@ def _get_json_from_entry_rows(row_iter): """Loop body to generate a json friendly structure""" ppelm_json = [] columns = next(row_iter) - for entry in row_iter: + logger.info('Processing Phospho.ELM dump') + for entry in tqdm(row_iter): row_dict = {c: e for c, e in zip(columns, entry)} ppelm_json.append(row_dict) return ppelm_json From 199064e964fa5c43ffdfdc0aa14e9860602418c9 Mon Sep 17 00:00:00 2001 From: kkaris Date: Tue, 26 Sep 2023 14:38:00 -0700 Subject: [PATCH 12/14] Handle missing selventa mappings --- indra/sources/bel/processor.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/indra/sources/bel/processor.py b/indra/sources/bel/processor.py index 5a485d6984..49eed505eb 100644 --- a/indra/sources/bel/processor.py +++ b/indra/sources/bel/processor.py @@ -547,9 +547,14 @@ def get_db_refs_by_name(ns, name, node_data): db_refs = {'UP': up_id} # Map Selventa families and complexes to FamPlex elif ns == 'SFAM': - sfam_id, xrefs = selventa_lookup[('SFAM', name)] - db_refs = {'SFAM': sfam_id} - indra_name = bel_to_indra.get(name) + try: + sfam_id, xrefs = selventa_lookup[('SFAM', name)] + db_refs = {"SFAM": sfam_id} + indra_name = bel_to_indra.get(name) + except KeyError: + indra_name = None + db_refs = None + if indra_name is None: logger.info('Could not find mapping for BEL/SFAM family: ' '%s (%s)' % (name, node_data)) @@ -614,9 +619,15 @@ def get_db_refs_by_name(ns, name, node_data): name = chebi_client.get_chebi_name_from_id(chebi_id) # SDIS, SCHEM: Look up the ID and include it in the db_refs elif ns in {'SDIS', 'SCHEM'}: - sid, xrefs = selventa_lookup[(ns, name)] - db_refs = xrefs.copy() - db_refs[ns] = sid + try: + sid, xrefs = selventa_lookup[(ns, name)] + db_refs = xrefs.copy() + db_refs[ns] = sid + except KeyError: + logger.info( + f"Could not map Selventa name {name} to ID for {ns}." + ) + return name, None elif ns == 'TEXT': db_refs = {ns: name} elif ns == 'TAX': From 9c396481d8d7c2bc777f9bd7100685adf0dcefd8 Mon Sep 17 00:00:00 2001 From: kkaris Date: Wed, 27 Sep 2023 11:27:22 -0700 Subject: [PATCH 13/14] Add new ubibrowser test files --- .../tests/test_sources/resources/ubibrowser_dub.txt | 10 +++++----- indra/tests/test_sources/resources/ubibrowser_e3.txt | 12 ++++++------ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/indra/tests/test_sources/resources/ubibrowser_dub.txt b/indra/tests/test_sources/resources/ubibrowser_dub.txt index ab5d69977d..3d290f03b0 100644 --- a/indra/tests/test_sources/resources/ubibrowser_dub.txt +++ b/indra/tests/test_sources/resources/ubibrowser_dub.txt @@ -1,5 +1,5 @@ -NUMBER E3ID SUBID E3AC SUBAC E3GENE SUBGENE SOURCE SOURCEID SENTENCE E3TYPE COUNT type species -671 UBP10_HUMAN PTEN_HUMAN Q14694 P60484 USP10 PTEN MEDLINE 28852924 We further demonstrated that USP10 directly interacted with and stabilized PTEN via deubiquitination. USP 1 DUB H.sapiens -673 UBP36_HUMAN SODM_HUMAN Q9P275 P04179 USP36 SOD2 MEDLINE 21268071 we identified a deubiquitinating enzyme USP36 that regulates the protein stability of SOD2 USP 1 DUB H.sapiens -675 UBP13_HUMAN UBL4A_HUMAN Q92995 P11441 USP13 UBL4A MEDLINE 24424410 we identify USP13 as a gp78-associated DUB that eliminates ubiquitin conjugates from Ubl4A to maintain the functionality of Bag6. USP 2 DUB H.sapiens -677 UBP33_HUMAN ARRB1_HUMAN Q8TEY7 P49407 USP33 ARRB1 MEDLINE 19363159 We now report the discovery that the deubiquitinating enzyme ubiquitin-specific protease 33 (USP33) binds beta-arrestin2 and leads to the deubiquitination of beta-arrestins. USP 1 DUB H.sapiens +NUMBER SwissProt ID (DUB) SwissProt ID (Substrate) SwissProt AC (DUB) SwissProt AC (Substrate) Gene Symbol (DUB) Gene Symbol (Substrate) SOURCE SOURCEID SENTENCE DUBTYPE COUNT type species +56 UBP33_HUMAN ARRB1_HUMAN Q8TEY7 P49407 USP33 ARRB1 MEDLINE 19363159 "We now report the discovery that the deubiquitinating enzyme ubiquitin-specific protease 33 (USP33) binds beta-arrestin2 and leads to the deubiquitination of beta-arrestins" USP 1 Training data H.sapiens +129 UBP13_HUMAN UBL4A_HUMAN Q92995 P11441 USP13 UBL4A MEDLINE 24424410 "we identify USP13 as a gp78-associated DUB that eliminates ubiquitin conjugates from Ubl4A to maintain the functionality of Bag6" USP 1 Other H.sapiens +388 UBP10_HUMAN PTEN_HUMAN Q14694 P60484 USP10 PTEN MEDLINE 28852924 "We further demonstrated that USP10 directly interacted with and stabilized PTEN via deubiquitination" USP 1 Training data H.sapiens +726 UBP36_HUMAN SODM_HUMAN Q9P275 P04179 USP36 SOD2 MEDLINE 21268071 "we identified a deubiquitinating enzyme USP36 that regulates the protein stability of SOD2" USP 1 Other H.sapiens diff --git a/indra/tests/test_sources/resources/ubibrowser_e3.txt b/indra/tests/test_sources/resources/ubibrowser_e3.txt index 22a1e52e21..95cd044892 100644 --- a/indra/tests/test_sources/resources/ubibrowser_e3.txt +++ b/indra/tests/test_sources/resources/ubibrowser_e3.txt @@ -1,6 +1,6 @@ -NUMBER E3ID SUBID E3AC SUBAC E3GENE SUBGENE SOURCE SOURCEID SENTENCE E3TYPE COUNT type species -1 AMFR2_HUMAN A1AT_HUMAN Q9UKV5 P01009 AMFR SERPINA1 MEDLINE 16979136 Here we report that gp78, a ubiquitin ligase (E3) pairing with mammalian Ubc7 for ERAD, ubiquitinates and facilitates degradation of ATZ, the classic deficiency variant of AAT having a Z mutation (Glu 342 Lys) RING 3 E3 H.sapiens -15 AMFR2_HUMAN HMDH_HUMAN Q9UKV5 P04035 AMFR HMGCR MEDLINE 20458442 UBE2G2, a previously known E2 of gp78, is demonstrated to be involved in the sterol-regulated ubiquitination and degradation of HMGCR RING 4 E3 H.sapiens -300 BRCA1_HUMAN BRCA1_HUMAN P38398 P38398 BRCA1 BRCA1 UniProt RNF8_BOVIN Following DNA double-strand breaks (DSBs), it is recruited to the sites of damage by ATM-phosphorylated MDC1, mediates the ubiquitination of histones H2A and H2AX, thereby promoting the formation of TP53BP1 and BRCA1 ionizing radiation-induced foci (IRIF) RING 21 E3 H.sapiens -5642 HRD1_CAEEL Q9BMU4_CAEEL Q20798 Q9BMU4 sel-11 atln-1 MEDLINE 32916628 UbiNet 2.0 RING 1 E3 C.elegans -5644 A0A2I4KBP1_DANRE SHH_DANRE A0A2I4KBP1 Q92008 gan shha MEDLINE 31503551 UbiNet 2.0 other 1 E3 D.rerio +NUMBER SwissProt ID (E3) SwissProt ID (Substrate) SwissProt AC (E3) SwissProt AC (Substrate) Gene Symbol (E3) Gene Symbol (Substrate) SOURCE SOURCEID SENTENCE E3TYPE COUNT type species +109 HRD1_CAEEL Q9BMU4_CAEEL Q20798 Q9BMU4 sel-11 atln-1 MEDLINE 32916628 32916628 RING 1 Other C.elegans +167 A0A2I4KBP1_DANRE SHH_DANRE A0A2I4KBP1 Q92008 gan shha MEDLINE 31503551 31503551 Other 1 Other D.rerio +198 AMFR2_HUMAN A1AT_HUMAN Q9UKV5 P01009 AMFR SERPINA1 MEDLINE 16979136 "Here we report that gp78, a ubiquitin ligase (E3) pairing with mammalian Ubc7 for ERAD, ubiquitinates and facilitates degradation of ATZ, the classic deficiency variant of AAT having a Z mutation (Glu 342 Lys)" RING 1 Training data H.sapiens +1040 GAN_HUMAN SHH_HUMAN Q9H2C0 Q15465 GAN SHH MEDLINE 31503551 31503551 BTB_3 1 Other H.sapiens +2631 SYVN1_HUMAN ATLA1_HUMAN Q86TM6 Q8WXF7 SYVN1 ATL1 MEDLINE 32916628 "The E3 Ubiquitin Ligase SYVN1 Ubiquitinates Atlastins to Remodel the Endoplasmic Reticulum Network." RING 1 Other H.sapiens From 5360deac23749024ada36d0f9c69df7c480a9d82 Mon Sep 17 00:00:00 2001 From: kkaris Date: Wed, 27 Sep 2023 11:29:10 -0700 Subject: [PATCH 14/14] Update ubibrowser test --- indra/tests/test_sources/test_ubibrowser.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/indra/tests/test_sources/test_ubibrowser.py b/indra/tests/test_sources/test_ubibrowser.py index fefe074b64..fb4c040853 100644 --- a/indra/tests/test_sources/test_ubibrowser.py +++ b/indra/tests/test_sources/test_ubibrowser.py @@ -13,14 +13,15 @@ def test_extract_statements(): up = ubibrowser.process_file(e3_file, dub_file) assert len(up.statements) == 9 - assert isinstance(up.statements[0], Ubiquitination) + assert isinstance(up.statements[2], Ubiquitination) assert isinstance(up.statements[-1], Deubiquitination) assert_valid_statements(up.statements) - #1 AMFR2_HUMAN A1AT_HUMAN Q9UKV5 P01009 AMFR SERPINA1 - # MEDLINE 16979136 Here we report that ... RING 3 E3 H.sapiens - e3_stmt = up.statements[0] + #198 AMFR2_HUMAN A1AT_HUMAN Q9UKV5 P01009 AMFR SERPINA1 + # MEDLINE 16979136 "Here we report that ..." RING 1 + # "Training data" H.sapiens + e3_stmt = up.statements[2] assert e3_stmt.enz.name == 'AMFR' assert e3_stmt.enz.db_refs['UP'] == 'Q9UKV5' assert e3_stmt.sub.name == 'SERPINA1' @@ -30,9 +31,10 @@ def test_extract_statements(): assert e3_stmt.evidence[0].pmid == '16979136' assert e3_stmt.evidence[0].text.startswith('Here we report that') - # 677 UBP33_HUMAN ARRB1_HUMAN Q8TEY7 P49407 USP33 ARRB1 - # MEDLINE 19363159 We now report the discovery that... USP 1 DUB H.sapiens - dub_stmt = up.statements[-1] + # 56 UBP33_HUMAN ARRB1_HUMAN Q8TEY7 P49407 USP33 ARRB1 MEDLINE + # 19363159 "We now report the discovery that " "USP 1" "Training data" + # H.sapiens + dub_stmt = up.statements[5] assert dub_stmt.enz.name == 'USP33' assert dub_stmt.enz.db_refs['UP'] == 'Q8TEY7' assert dub_stmt.sub.name == 'ARRB1'