From 1c0c5aa076dbd706ff2a91d7aa69a5da144291d3 Mon Sep 17 00:00:00 2001
From: kkaris <karis.klas@gmail.com>
Date: Fri, 8 Sep 2023 13:56:29 -0700
Subject: [PATCH 01/14] Update download base url

---
 indra/sources/ubibrowser/api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/indra/sources/ubibrowser/api.py b/indra/sources/ubibrowser/api.py
index e2a8fea4e3..e9690a9a28 100644
--- a/indra/sources/ubibrowser/api.py
+++ b/indra/sources/ubibrowser/api.py
@@ -4,7 +4,7 @@
 from .processor import UbiBrowserProcessor
 
 
-DOWNLOAD_URL = 'http://ubibrowser.ncpsb.org.cn/v2/Public/download/literature/'
+DOWNLOAD_URL = 'http://ubibrowser.bio-it.cn/ubibrowser_v3/Public/download/literature/'
 E3_URL = DOWNLOAD_URL + 'literature.E3.txt'
 DUB_URL = DOWNLOAD_URL + 'literature.DUB.txt'
 

From 6b644a1a1eea8429ab7641198a81d90fe7527ceb Mon Sep 17 00:00:00 2001
From: Ben Gyori <ben.gyori@gmail.com>
Date: Fri, 8 Sep 2023 18:05:26 -0400
Subject: [PATCH 02/14] Handle PubChem SIDs in SIGNOR

---
 indra/sources/signor/processor.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/indra/sources/signor/processor.py b/indra/sources/signor/processor.py
index 91fd1f33c2..5a163c6d3b 100644
--- a/indra/sources/signor/processor.py
+++ b/indra/sources/signor/processor.py
@@ -246,7 +246,11 @@ def _get_agent(self, ent_name, ent_type, id, database):
                     # SIGNOR's format in which it leaves extra spaces around
                     # the ID, as in 'CID: 923'
                     id = id[4:].strip()
-                elif database == 'ChEBI' and id.startswith('SID:'):
+                # In older releases PubChem substance IDs were used with
+                # ChEBI as the source, these were later changed to use
+                # PUBCHEM
+                elif database in {'ChEBI', 'PUBCHEM'} \
+                        and id.startswith('SID:'):
                     gnd_type = 'PUBCHEM.SUBSTANCE'
                     id = id[4:].strip()
                 db_refs = {gnd_type: id}

From 1ea722c0887b10190baccbae4d9efabcee1be2b0 Mon Sep 17 00:00:00 2001
From: Ben Gyori <ben.gyori@gmail.com>
Date: Sat, 9 Sep 2023 15:35:00 -0400
Subject: [PATCH 03/14] Update UbiBrowser column names

---
 indra/sources/ubibrowser/processor.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/indra/sources/ubibrowser/processor.py b/indra/sources/ubibrowser/processor.py
index 0403fa8d2e..db5ecfb4ac 100644
--- a/indra/sources/ubibrowser/processor.py
+++ b/indra/sources/ubibrowser/processor.py
@@ -23,10 +23,12 @@ def _process_row(row, stmt_type):
         # is called "E3"
         # There are some examples where a complex is implied (e.g., BMI1-RNF2),
         # for simplicity we just ignore these
-        if '-' in row['E3AC']:
+        if '-' in row['SwissProt AC (E3)']:
             return None
-        subj_agent = get_standard_agent(row['E3GENE'], {'UP': row['E3AC']})
-        obj_agent = get_standard_agent(row['SUBGENE'], {'UP': row['SUBAC']})
+        subj_agent = get_standard_agent(row['Gene Symbol (E3)'],
+                                        {'UP': row['SwissProt AC (E3)']})
+        obj_agent = get_standard_agent(row['Gene Symbol (Substrate)'],
+                                       {'UP': row['SwissProt AC (Substrate)']})
         if row['SOURCE'] == 'MEDLINE' and row['SOURCEID'] != 'UNIPROT':
             # Note: we sometimes get int here
             pmid = str(row['SOURCEID'])

From dfea3cd0e4b846470a8ae3a978b8cca0bed9a5b9 Mon Sep 17 00:00:00 2001
From: Ben Gyori <ben.gyori@gmail.com>
Date: Sat, 9 Sep 2023 15:46:55 -0400
Subject: [PATCH 04/14] Adapt to more column changes and corner cases

---
 indra/sources/ubibrowser/processor.py | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/indra/sources/ubibrowser/processor.py b/indra/sources/ubibrowser/processor.py
index db5ecfb4ac..68182fb763 100644
--- a/indra/sources/ubibrowser/processor.py
+++ b/indra/sources/ubibrowser/processor.py
@@ -10,23 +10,31 @@ def __init__(self, e3_df, dub_df):
         self.statements = []
 
     def extract_statements(self):
-        for df, stmt_type in [(self.e3_df, Ubiquitination),
-                              (self.dub_df, Deubiquitination)]:
+        for df, stmt_type, subj_suffix in \
+                [(self.e3_df, Ubiquitination, 'E3'),
+                 (self.dub_df, Deubiquitination, 'DUB')]:
             for _, row in df.iterrows():
-                stmt = self._process_row(row, stmt_type)
+                stmt = self._process_row(row, stmt_type, subj_suffix)
                 if stmt:
                     self.statements.append(stmt)
 
     @staticmethod
-    def _process_row(row, stmt_type):
+    def _process_row(row, stmt_type, subj_suffix):
         # Note that even in the DUB table the subject of the statement
         # is called "E3"
         # There are some examples where a complex is implied (e.g., BMI1-RNF2),
         # for simplicity we just ignore these
-        if '-' in row['SwissProt AC (E3)']:
+        if '#' in row[f'SwissProt AC ({subj_suffix})']:
             return None
-        subj_agent = get_standard_agent(row['Gene Symbol (E3)'],
-                                        {'UP': row['SwissProt AC (E3)']})
+        # Interestingly, some of the E3s are missing entirely, we skip these
+        elif row[f'SwissProt AC ({subj_suffix})'] == '-':
+            return None
+        # Some of the same corner cases apply to the substrate as well
+        if row['SwissProt AC (Substrate)'] == '-':
+            return None
+        subj_agent = \
+            get_standard_agent(row[f'Gene Symbol ({subj_suffix})'],
+                               {'UP': row[f'SwissProt AC ({subj_suffix})']})
         obj_agent = get_standard_agent(row['Gene Symbol (Substrate)'],
                                        {'UP': row['SwissProt AC (Substrate)']})
         if row['SOURCE'] == 'MEDLINE' and row['SOURCEID'] != 'UNIPROT':

From f3b1787b1087cbabfd45fea08792b1246f79fe87 Mon Sep 17 00:00:00 2001
From: kkaris <karis.klas@gmail.com>
Date: Mon, 11 Sep 2023 15:46:34 -0700
Subject: [PATCH 05/14] Update version in sources.bel.api

---
 indra/sources/bel/api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/indra/sources/bel/api.py b/indra/sources/bel/api.py
index fb1621c54d..646b1f14cc 100644
--- a/indra/sources/bel/api.py
+++ b/indra/sources/bel/api.py
@@ -17,7 +17,7 @@
 
 logger = logging.getLogger(__name__)
 
-version = 'v1.0.0'
+version = 'v1.1.1'
 branch = 'https://github.com/cthoyt/selventa-knowledge/raw/' \
          '{}/selventa_knowledge/{}'
 large_corpus_url = branch.format(version, 'large_corpus.bel.nodelink.json.gz')

From a58d0a86106585c921071302c6141e67187b028b Mon Sep 17 00:00:00 2001
From: kkaris <karis.klas@gmail.com>
Date: Tue, 12 Sep 2023 16:26:58 -0700
Subject: [PATCH 06/14] Update version

---
 indra/sources/bel/api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/indra/sources/bel/api.py b/indra/sources/bel/api.py
index 646b1f14cc..a0de304502 100644
--- a/indra/sources/bel/api.py
+++ b/indra/sources/bel/api.py
@@ -17,7 +17,7 @@
 
 logger = logging.getLogger(__name__)
 
-version = 'v1.1.1'
+version = 'v1.1.2'
 branch = 'https://github.com/cthoyt/selventa-knowledge/raw/' \
          '{}/selventa_knowledge/{}'
 large_corpus_url = branch.format(version, 'large_corpus.bel.nodelink.json.gz')

From 34ca6443a320459afca90b952085473ef2031407 Mon Sep 17 00:00:00 2001
From: Ben Gyori <ben.gyori@gmail.com>
Date: Tue, 19 Sep 2023 23:08:00 -0400
Subject: [PATCH 07/14] Handle corner case for MeSH IDs

---
 indra/sources/rlimsp/processor.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/indra/sources/rlimsp/processor.py b/indra/sources/rlimsp/processor.py
index d17df1ef24..7587354c8b 100644
--- a/indra/sources/rlimsp/processor.py
+++ b/indra/sources/rlimsp/processor.py
@@ -251,7 +251,10 @@ def get_agent_from_entity_info(entity_info):
             refs['CHEBI'] = 'CHEBI:%s' % id_dict['idString']
         # These we take as is
         elif id_dict['source'] in ('MESH', 'OMIM'):
-            refs[id_dict['source']] = id_dict['idString']
+            if ';' in id_dict['idString']:
+                refs[id_dict['source']] = id_dict['idString'].split(';')[0]
+            else:
+                refs[id_dict['source']] = id_dict['idString']
         # CTD is sometimes used for MESH chemical IDs but can also be just '-'
         elif id_dict['source'] == 'CTD':
             if id_dict['idString'] != '-':

From 66ba51e2177f5fcdb5e7ea6e8e8b37ba17f2d24a Mon Sep 17 00:00:00 2001
From: kkaris <karis.klas@gmail.com>
Date: Fri, 15 Sep 2023 08:31:00 -0700
Subject: [PATCH 08/14] Update to https for CTD url

---
 indra/sources/ctd/api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/indra/sources/ctd/api.py b/indra/sources/ctd/api.py
index 2ee8055851..a46d07f62c 100644
--- a/indra/sources/ctd/api.py
+++ b/indra/sources/ctd/api.py
@@ -2,7 +2,7 @@
 from .processor import CTDProcessor, CTDChemicalDiseaseProcessor, \
     CTDGeneDiseaseProcessor, CTDChemicalGeneProcessor
 
-base_url = 'http://ctdbase.org/reports/'
+base_url = 'https://ctdbase.org/reports/'
 
 urls = {
     'chemical_gene': base_url + 'CTD_chem_gene_ixns.tsv.gz',

From 255c44ac67b0a42c37c43f51121e2d0425eccacc Mon Sep 17 00:00:00 2001
From: kkaris <karis.klas@gmail.com>
Date: Fri, 22 Sep 2023 08:22:22 -0700
Subject: [PATCH 09/14] Replace gilda service with local gilda

---
 indra/sources/phosphoelm/processor.py | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/indra/sources/phosphoelm/processor.py b/indra/sources/phosphoelm/processor.py
index 9fb7e0b572..2f7f24ef60 100644
--- a/indra/sources/phosphoelm/processor.py
+++ b/indra/sources/phosphoelm/processor.py
@@ -1,13 +1,13 @@
 import logging
 import requests
 
+import gilda
 from indra.databases import uniprot_client, hgnc_client
 from indra.statements.validate import validate_text_refs
 from indra.statements import Phosphorylation, Evidence, Agent
 
 from .phosphoelm_mapping import phosphoelm_mapping
 
-gilda_url = 'http://grounding.indra.bio/ground'
 logger = logging.getLogger(__name__)
 
 
@@ -156,17 +156,12 @@ def _agent_from_str(txt):
 
 def _gilda_grounder(txt):
     # Pre-process text for grounding
-    txt = txt.replace('_group', '')
-    txt = txt.replace('_', '-')
-    txt = txt.split('/')[0]
-    res = requests.post(gilda_url, json={'text': txt})
-    if res.status_code != 200:
-        logger.warning('Gilda service responded with status code %d' %
-                       res.status_code)
+    txt = txt.replace("_group", "")
+    txt = txt.replace("_", "-")
+    txt = txt.split("/")[0]
+    res = gilda.ground(txt)
+    if not res:
+        logger.warning(f"Gilda grounder returned no results for {txt}")
         return None
-    rj = res.json()
-    if not rj:
-        return None
-    top_term = rj[0]['term']
-    return top_term
-
+    top_term = res[0].term
+    return top_term.to_json()

From 89d560a92ffcf2cad7a9c43710955bc633caaec2 Mon Sep 17 00:00:00 2001
From: kkaris <karis.klas@gmail.com>
Date: Fri, 22 Sep 2023 08:22:38 -0700
Subject: [PATCH 10/14] Add tqdm to processing

---
 indra/sources/phosphoelm/processor.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/indra/sources/phosphoelm/processor.py b/indra/sources/phosphoelm/processor.py
index 2f7f24ef60..4b8b9aad91 100644
--- a/indra/sources/phosphoelm/processor.py
+++ b/indra/sources/phosphoelm/processor.py
@@ -1,5 +1,5 @@
 import logging
-import requests
+from tqdm import tqdm
 
 import gilda
 from indra.databases import uniprot_client, hgnc_client
@@ -40,7 +40,8 @@ def process_phosphorylations(self, skip_empty=True):
             Default: True. If False, also create statements when upstream
             kinases in entry['kinases'] are not known.
         """
-        for entry in self._phosphoelm_data:
+        logger.info("Processing Phospho.ELM phosphorylations")
+        for entry in tqdm(self._phosphoelm_data):
             if entry['species'].lower() != 'homo sapiens' or\
                     skip_empty and not entry['kinases']:
                 # Skip entries without any kinases or if species is other

From 46ba31f56d30d691e5c5be0aca1ab45f15358f54 Mon Sep 17 00:00:00 2001
From: kkaris <karis.klas@gmail.com>
Date: Fri, 22 Sep 2023 08:23:53 -0700
Subject: [PATCH 11/14] Add tqdm to file processing in phosphoelm

---
 indra/sources/phosphoelm/api.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/indra/sources/phosphoelm/api.py b/indra/sources/phosphoelm/api.py
index b8cc7bd03c..6b542f19a4 100644
--- a/indra/sources/phosphoelm/api.py
+++ b/indra/sources/phosphoelm/api.py
@@ -1,6 +1,8 @@
 import csv
 import logging
 
+from tqdm import tqdm
+
 from .processor import PhosphoElmProcessor
 
 logger = logging.getLogger(__name__)
@@ -36,7 +38,8 @@ def _get_json_from_entry_rows(row_iter):
     """Loop body to generate a json friendly structure"""
     ppelm_json = []
     columns = next(row_iter)
-    for entry in row_iter:
+    logger.info('Processing Phospho.ELM dump')
+    for entry in tqdm(row_iter):
         row_dict = {c: e for c, e in zip(columns, entry)}
         ppelm_json.append(row_dict)
     return ppelm_json

From 199064e964fa5c43ffdfdc0aa14e9860602418c9 Mon Sep 17 00:00:00 2001
From: kkaris <karis.klas@gmail.com>
Date: Tue, 26 Sep 2023 14:38:00 -0700
Subject: [PATCH 12/14] Handle missing selventa mappings

---
 indra/sources/bel/processor.py | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/indra/sources/bel/processor.py b/indra/sources/bel/processor.py
index 5a485d6984..49eed505eb 100644
--- a/indra/sources/bel/processor.py
+++ b/indra/sources/bel/processor.py
@@ -547,9 +547,14 @@ def get_db_refs_by_name(ns, name, node_data):
             db_refs = {'UP': up_id}
     # Map Selventa families and complexes to FamPlex
     elif ns == 'SFAM':
-        sfam_id, xrefs = selventa_lookup[('SFAM', name)]
-        db_refs = {'SFAM': sfam_id}
-        indra_name = bel_to_indra.get(name)
+        try:
+            sfam_id, xrefs = selventa_lookup[('SFAM', name)]
+            db_refs = {"SFAM": sfam_id}
+            indra_name = bel_to_indra.get(name)
+        except KeyError:
+            indra_name = None
+            db_refs = None
+
         if indra_name is None:
             logger.info('Could not find mapping for BEL/SFAM family: '
                         '%s (%s)' % (name, node_data))
@@ -614,9 +619,15 @@ def get_db_refs_by_name(ns, name, node_data):
         name = chebi_client.get_chebi_name_from_id(chebi_id)
     # SDIS, SCHEM: Look up the ID and include it in the db_refs
     elif ns in {'SDIS', 'SCHEM'}:
-        sid, xrefs = selventa_lookup[(ns, name)]
-        db_refs = xrefs.copy()
-        db_refs[ns] = sid
+        try:
+            sid, xrefs = selventa_lookup[(ns, name)]
+            db_refs = xrefs.copy()
+            db_refs[ns] = sid
+        except KeyError:
+            logger.info(
+                f"Could not map Selventa name {name} to ID for {ns}."
+            )
+            return name, None
     elif ns == 'TEXT':
         db_refs = {ns: name}
     elif ns == 'TAX':

From 9c396481d8d7c2bc777f9bd7100685adf0dcefd8 Mon Sep 17 00:00:00 2001
From: kkaris <karis.klas@gmail.com>
Date: Wed, 27 Sep 2023 11:27:22 -0700
Subject: [PATCH 13/14] Add new ubibrowser test files

---
 .../tests/test_sources/resources/ubibrowser_dub.txt  | 10 +++++-----
 indra/tests/test_sources/resources/ubibrowser_e3.txt | 12 ++++++------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/indra/tests/test_sources/resources/ubibrowser_dub.txt b/indra/tests/test_sources/resources/ubibrowser_dub.txt
index ab5d69977d..3d290f03b0 100644
--- a/indra/tests/test_sources/resources/ubibrowser_dub.txt
+++ b/indra/tests/test_sources/resources/ubibrowser_dub.txt
@@ -1,5 +1,5 @@
-NUMBER	E3ID	SUBID	E3AC	SUBAC	E3GENE	SUBGENE	SOURCE	SOURCEID	SENTENCE	E3TYPE	COUNT	type	species
-671	UBP10_HUMAN	PTEN_HUMAN	Q14694	P60484	USP10	PTEN	MEDLINE	28852924	We further demonstrated that USP10 directly interacted with and stabilized PTEN via deubiquitination.	USP	1	DUB	H.sapiens
-673	UBP36_HUMAN	SODM_HUMAN	Q9P275	P04179	USP36	SOD2	MEDLINE	21268071	we identified a deubiquitinating enzyme USP36 that regulates the protein stability of SOD2	USP	1	DUB	H.sapiens
-675	UBP13_HUMAN	UBL4A_HUMAN	Q92995	P11441	USP13	UBL4A	MEDLINE	24424410	we identify USP13 as a gp78-associated DUB that eliminates ubiquitin conjugates from Ubl4A to maintain the functionality of  Bag6.	USP	2	DUB	H.sapiens
-677	UBP33_HUMAN	ARRB1_HUMAN	Q8TEY7	P49407	USP33	ARRB1	MEDLINE	19363159	We now report the discovery that the deubiquitinating enzyme ubiquitin-specific protease 33 (USP33) binds beta-arrestin2 and leads to the deubiquitination of beta-arrestins.	USP	1	DUB	H.sapiens
+NUMBER	SwissProt ID (DUB)	SwissProt ID (Substrate)	SwissProt AC (DUB)	SwissProt AC (Substrate)	Gene Symbol (DUB)	Gene Symbol (Substrate)	SOURCE	SOURCEID	SENTENCE	DUBTYPE	COUNT	type	species
+56	UBP33_HUMAN	ARRB1_HUMAN	Q8TEY7	P49407	USP33	ARRB1	MEDLINE	19363159	"We now report the discovery that the deubiquitinating enzyme ubiquitin-specific protease 33 (<span class=""match term0"">USP33</span>) binds beta-arrestin2 and leads to the deubiquitination of beta-arrestins"	USP	1	Training data	H.sapiens
+129	UBP13_HUMAN	UBL4A_HUMAN	Q92995	P11441	USP13	UBL4A	MEDLINE	24424410	"we identify <span class=""match term0"">USP13</span> as a gp78-associated DUB that eliminates ubiquitin conjugates from <span class=""match term1"">Ubl4A</span> to maintain the functionality of Bag6"	USP	1	Other	H.sapiens
+388	UBP10_HUMAN	PTEN_HUMAN	Q14694	P60484	USP10	PTEN	MEDLINE	28852924	"We further demonstrated that <span class=""match term0"">USP10</span> directly interacted with and stabilized <span class=""match term1"">PTEN</span> via deubiquitination"	USP	1	Training data	H.sapiens
+726	UBP36_HUMAN	SODM_HUMAN	Q9P275	P04179	USP36	SOD2	MEDLINE	21268071	"we identified a deubiquitinating enzyme <span class=""match term0"">USP36</span> that regulates the protein stability of <span class=""match term1"">SOD2</span>"	USP	1	Other	H.sapiens
diff --git a/indra/tests/test_sources/resources/ubibrowser_e3.txt b/indra/tests/test_sources/resources/ubibrowser_e3.txt
index 22a1e52e21..95cd044892 100644
--- a/indra/tests/test_sources/resources/ubibrowser_e3.txt
+++ b/indra/tests/test_sources/resources/ubibrowser_e3.txt
@@ -1,6 +1,6 @@
-NUMBER	E3ID	SUBID	E3AC	SUBAC	E3GENE	SUBGENE	SOURCE	SOURCEID	SENTENCE	E3TYPE	COUNT	type	species
-1	AMFR2_HUMAN	A1AT_HUMAN	Q9UKV5	P01009	AMFR	SERPINA1	MEDLINE	16979136	Here we report that gp78, a ubiquitin ligase (E3) pairing with mammalian Ubc7 for ERAD, ubiquitinates and facilitates degradation of ATZ, the classic deficiency variant of AAT having a Z mutation (Glu 342 Lys)	RING	3	E3	H.sapiens
-15	AMFR2_HUMAN	HMDH_HUMAN	Q9UKV5	P04035	AMFR	HMGCR	MEDLINE	20458442	UBE2G2, a previously known E2 of gp78, is demonstrated to be involved in the sterol-regulated ubiquitination and degradation of HMGCR	RING	4	E3	H.sapiens
-300	BRCA1_HUMAN	BRCA1_HUMAN	P38398	P38398	BRCA1	BRCA1	UniProt	RNF8_BOVIN	Following DNA double-strand breaks (DSBs), it is recruited to the sites of damage by ATM-phosphorylated MDC1, mediates the ubiquitination of histones H2A and H2AX, thereby promoting the formation of TP53BP1 and BRCA1 ionizing radiation-induced foci (IRIF)	RING	21	E3	H.sapiens
-5642	HRD1_CAEEL	Q9BMU4_CAEEL	Q20798	Q9BMU4	sel-11	atln-1	MEDLINE	32916628	UbiNet 2.0	RING	1	E3	C.elegans
-5644	A0A2I4KBP1_DANRE	SHH_DANRE	A0A2I4KBP1	Q92008	gan	shha	MEDLINE	31503551	UbiNet 2.0	other	1	E3	D.rerio
+NUMBER	SwissProt ID (E3)	SwissProt ID (Substrate)	SwissProt AC (E3)	SwissProt AC (Substrate)	Gene Symbol (E3)	Gene Symbol (Substrate)	SOURCE	SOURCEID	SENTENCE	E3TYPE	COUNT	type	species
+109	HRD1_CAEEL	Q9BMU4_CAEEL	Q20798	Q9BMU4	sel-11	atln-1	MEDLINE	32916628	32916628	RING	1	Other	C.elegans
+167	A0A2I4KBP1_DANRE	SHH_DANRE	A0A2I4KBP1	Q92008	gan	shha	MEDLINE	31503551	31503551	Other	1	Other	D.rerio
+198	AMFR2_HUMAN	A1AT_HUMAN	Q9UKV5	P01009	AMFR	SERPINA1	MEDLINE	16979136	"Here we report that <span class=""match term0"">gp78</span>, a ubiquitin ligase (E3) pairing with mammalian Ubc7 for ERAD, ubiquitinates and facilitates degradation of ATZ, the classic deficiency variant of <span class=""match term1"">AAT</span> having a Z mutation (Glu 342 Lys)"	RING	1	Training data	H.sapiens
+1040	GAN_HUMAN	SHH_HUMAN	Q9H2C0	Q15465	GAN	SHH	MEDLINE	31503551	31503551	BTB_3	1	Other	H.sapiens
+2631	SYVN1_HUMAN	ATLA1_HUMAN	Q86TM6	Q8WXF7	SYVN1	ATL1	MEDLINE	32916628	"The E3 Ubiquitin Ligase <span class=""match term0"">SYVN1</span> Ubiquitinates Atlastins to Remodel the Endoplasmic Reticulum Network."	RING	1	Other	H.sapiens

From 5360deac23749024ada36d0f9c69df7c480a9d82 Mon Sep 17 00:00:00 2001
From: kkaris <karis.klas@gmail.com>
Date: Wed, 27 Sep 2023 11:29:10 -0700
Subject: [PATCH 14/14] Update ubibrowser test

---
 indra/tests/test_sources/test_ubibrowser.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/indra/tests/test_sources/test_ubibrowser.py b/indra/tests/test_sources/test_ubibrowser.py
index fefe074b64..fb4c040853 100644
--- a/indra/tests/test_sources/test_ubibrowser.py
+++ b/indra/tests/test_sources/test_ubibrowser.py
@@ -13,14 +13,15 @@
 def test_extract_statements():
     up = ubibrowser.process_file(e3_file, dub_file)
     assert len(up.statements) == 9
-    assert isinstance(up.statements[0], Ubiquitination)
+    assert isinstance(up.statements[2], Ubiquitination)
     assert isinstance(up.statements[-1], Deubiquitination)
 
     assert_valid_statements(up.statements)
 
-    #1	AMFR2_HUMAN	A1AT_HUMAN	Q9UKV5	P01009	AMFR	SERPINA1
-    # MEDLINE	16979136	Here we report that ...	RING	3	E3	H.sapiens
-    e3_stmt = up.statements[0]
+    #198	AMFR2_HUMAN	A1AT_HUMAN	Q9UKV5	P01009	AMFR	SERPINA1
+    # MEDLINE	16979136	"Here we report that ..."	RING	1
+    # "Training data"	H.sapiens
+    e3_stmt = up.statements[2]
     assert e3_stmt.enz.name == 'AMFR'
     assert e3_stmt.enz.db_refs['UP'] == 'Q9UKV5'
     assert e3_stmt.sub.name == 'SERPINA1'
@@ -30,9 +31,10 @@ def test_extract_statements():
     assert e3_stmt.evidence[0].pmid == '16979136'
     assert e3_stmt.evidence[0].text.startswith('Here we report that')
 
-    # 677	UBP33_HUMAN	ARRB1_HUMAN	Q8TEY7	P49407	USP33	ARRB1
-    # MEDLINE	19363159	We now report the discovery that...	USP	1	DUB	H.sapiens
-    dub_stmt = up.statements[-1]
+    # 56	UBP33_HUMAN	ARRB1_HUMAN	Q8TEY7	P49407	USP33	ARRB1	MEDLINE
+    # 19363159	"We now report the discovery that "	"USP	1"	"Training data"
+    # H.sapiens
+    dub_stmt = up.statements[5]
     assert dub_stmt.enz.name == 'USP33'
     assert dub_stmt.enz.db_refs['UP'] == 'Q8TEY7'
     assert dub_stmt.sub.name == 'ARRB1'