From 2cc39ceeb990e92b57997bae39a0202c19eeb4c5 Mon Sep 17 00:00:00 2001 From: Ben Gyori Date: Sun, 26 Nov 2023 10:09:47 -0500 Subject: [PATCH 1/3] Add handling for CollectiveName --- indra/literature/pubmed_client.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/indra/literature/pubmed_client.py b/indra/literature/pubmed_client.py index 473e696cf8..8fbdac8109 100644 --- a/indra/literature/pubmed_client.py +++ b/indra/literature/pubmed_client.py @@ -543,6 +543,9 @@ def _parse_author(author_info, include_details=False): parsed_info["suffix"] = element.text elif element.tag == "Identifier": parsed_info["identifier"] = element.text + # This happens for some working groups credited as authors + elif element.tag == "CollectiveName": + parsed_info["collective_name"] = element.text parsed_info["affiliations"] = affiliations return parsed_info @@ -826,7 +829,8 @@ def get_metadata_for_all_ids(pmid_list, get_issns_from_nlm=False, 'journal_abbrev', 'journal_nlm_id', 'issn_list', 'page'. """ all_metadata = {} - for ids in tqdm.tqdm(batch_iter(pmid_list, 200), desc='Retrieving metadata'): + for ids in tqdm.tqdm(batch_iter(pmid_list, 200), desc='Retrieving metadata', + total=len(pmid_list)//200+1): time.sleep(0.1) metadata = get_metadata_for_ids(list(ids), get_issns_from_nlm=get_issns_from_nlm, From 5336d8975d1d47a20fee760753a5380cef793c40 Mon Sep 17 00:00:00 2001 From: Ben Gyori Date: Fri, 24 Nov 2023 14:15:41 -0500 Subject: [PATCH 2/3] Add some handling for missing tool --- indra/literature/pubmed_client.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/indra/literature/pubmed_client.py b/indra/literature/pubmed_client.py index 8fbdac8109..3468e8d870 100644 --- a/indra/literature/pubmed_client.py +++ b/indra/literature/pubmed_client.py @@ -944,6 +944,12 @@ def get_all_ids(search_term): """ cmd = f'esearch -db pubmed -query "{search_term}" | efetch -format uid' res = subprocess.getoutput(cmd) + if not isinstance(res, str) or "esearch: command not found" in res: + raise RuntimeError("The esearch utility could not be found. " + "This function only works if edirect is " + "installed and is visible on your PATH. " + "See https://www.ncbi.nlm.nih.gov/books/NBK179288/ " + "for instructions.") # Output is divided by new lines elements = res.split('\n') # If there are more than 10k IDs, the CLI outputs a . for each From e5854044a6d4ad77a8d657662bd03b79f72a4c24 Mon Sep 17 00:00:00 2001 From: Ben Gyori Date: Sun, 17 Dec 2023 15:04:22 -0500 Subject: [PATCH 3/3] Generalize missing tool handling --- indra/literature/pubmed_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/indra/literature/pubmed_client.py b/indra/literature/pubmed_client.py index 3468e8d870..8a64c2ccfc 100644 --- a/indra/literature/pubmed_client.py +++ b/indra/literature/pubmed_client.py @@ -944,7 +944,7 @@ def get_all_ids(search_term): """ cmd = f'esearch -db pubmed -query "{search_term}" | efetch -format uid' res = subprocess.getoutput(cmd) - if not isinstance(res, str) or "esearch: command not found" in res: + if not isinstance(res, str) or "not found" in res: raise RuntimeError("The esearch utility could not be found. " "This function only works if edirect is " "installed and is visible on your PATH. "