From ed853c1c5dc9c143e8f9e61def3e98a534b0d978 Mon Sep 17 00:00:00 2001
From: Harshil Patel <drpatelhh@gmail.com>
Date: Tue, 14 Sep 2021 23:35:20 +0100
Subject: [PATCH 01/14] Replace requests with urllib

---
 bin/sra_ids_to_runinfo.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/bin/sra_ids_to_runinfo.py b/bin/sra_ids_to_runinfo.py
index 1480e8f2..41a5c0ee 100755
--- a/bin/sra_ids_to_runinfo.py
+++ b/bin/sra_ids_to_runinfo.py
@@ -5,8 +5,9 @@
 import sys
 import csv
 import errno
-import requests
 import argparse
+from urllib.request import urlopen
+from urllib.error import URLError, HTTPError
 
 ## Example ids supported by this script
 SRA_IDS = ['PRJNA63463', 'SAMN00765663', 'SRA023522', 'SRP003255', 'SRR390278', 'SRS282569', 'SRX111814']
@@ -61,13 +62,17 @@ def make_dir(path):
 
 def fetch_url(url, encoding='utf-8'):
     try:
-        r = requests.get(url)
-    except requests.exceptions.RequestException as e:
-        raise SystemExit(e)
-    if r.status_code != 200:
-        print("ERROR: Connection failed\nError code '{}'".format(r.status_code))
+        with urlopen(url) as f:
+            r = f.read().decode(encoding).splitlines()
+    except HTTPError as e:
+        print('The server couldn\'t fulfill the request.')
+        print('Error code: {}'.format(e.code))
         sys.exit(1)
-    return r.content.decode(encoding).splitlines()
+    except URLError as e:
+        print('We failed to reach a server.')
+        print('Reason: {}'.format(e.reason))
+        sys.exit(1)
+    return r
 
 def id_to_srx(db_id):
     ids = []

From 79444d5605404c85ba20404ffdc86185a6c51321 Mon Sep 17 00:00:00 2001
From: Harshil Patel <drpatelhh@gmail.com>
Date: Tue, 14 Sep 2021 23:35:35 +0100
Subject: [PATCH 02/14] Bump Python version to 3.9.5

---
 modules/local/get_software_versions.nf   | 6 +++---
 modules/local/multiqc_mappings_config.nf | 6 +++---
 modules/local/sra_ids_to_runinfo.nf      | 6 +++---
 modules/local/sra_runinfo_to_ftp.nf      | 6 +++---
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/modules/local/get_software_versions.nf b/modules/local/get_software_versions.nf
index 4d37bd6a..65b7f340 100644
--- a/modules/local/get_software_versions.nf
+++ b/modules/local/get_software_versions.nf
@@ -8,11 +8,11 @@ process GET_SOFTWARE_VERSIONS {
         mode: params.publish_dir_mode,
         saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'pipeline_info', meta:[:], publish_by_meta:[]) }
 
-    conda (params.enable_conda ? "conda-forge::python=3.8.3" : null)
+    conda (params.enable_conda ? "conda-forge::python=3.9.5" : null)
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
-        container "https://depot.galaxyproject.org/singularity/python:3.8.3"
+        container "https://depot.galaxyproject.org/singularity/python:3.9--1"
     } else {
-        container "quay.io/biocontainers/python:3.8.3"
+        container "quay.io/biocontainers/python:3.9--1"
     }
 
     cache false
diff --git a/modules/local/multiqc_mappings_config.nf b/modules/local/multiqc_mappings_config.nf
index 63121b40..8360cd34 100644
--- a/modules/local/multiqc_mappings_config.nf
+++ b/modules/local/multiqc_mappings_config.nf
@@ -8,11 +8,11 @@ process MULTIQC_MAPPINGS_CONFIG {
         mode: params.publish_dir_mode,
         saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) }
 
-    conda (params.enable_conda ? "conda-forge::python=3.8.3" : null)
+    conda (params.enable_conda ? "conda-forge::python=3.9.5" : null)
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
-        container "https://depot.galaxyproject.org/singularity/python:3.8.3"
+        container "https://depot.galaxyproject.org/singularity/python:3.9--1"
     } else {
-        container "quay.io/biocontainers/python:3.8.3"
+        container "quay.io/biocontainers/python:3.9--1"
     }
 
     input:
diff --git a/modules/local/sra_ids_to_runinfo.nf b/modules/local/sra_ids_to_runinfo.nf
index b277197e..3d3fc063 100644
--- a/modules/local/sra_ids_to_runinfo.nf
+++ b/modules/local/sra_ids_to_runinfo.nf
@@ -10,11 +10,11 @@ process SRA_IDS_TO_RUNINFO {
         mode: params.publish_dir_mode,
         saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) }
 
-    conda (params.enable_conda ? "conda-forge::sed=4.7" : null)
+    conda (params.enable_conda ? "conda-forge::python=3.9.5" : null)
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
-        container "https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img"
+        container "https://depot.galaxyproject.org/singularity/python:3.9--1"
     } else {
-        container "biocontainers/biocontainers:v1.2.0_cv1"
+        container "quay.io/biocontainers/python:3.9--1"
     }
 
     input:
diff --git a/modules/local/sra_runinfo_to_ftp.nf b/modules/local/sra_runinfo_to_ftp.nf
index b0421aea..f426f4ab 100644
--- a/modules/local/sra_runinfo_to_ftp.nf
+++ b/modules/local/sra_runinfo_to_ftp.nf
@@ -8,11 +8,11 @@ process SRA_RUNINFO_TO_FTP {
         mode: params.publish_dir_mode,
         saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) }
 
-    conda (params.enable_conda ? "conda-forge::python=3.8.3" : null)
+    conda (params.enable_conda ? "conda-forge::python=3.9.5" : null)
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
-        container "https://depot.galaxyproject.org/singularity/python:3.8.3"
+        container "https://depot.galaxyproject.org/singularity/python:3.9--1"
     } else {
-        container "quay.io/biocontainers/python:3.8.3"
+        container "quay.io/biocontainers/python:3.9--1"
     }
 
     input:

From 74b7f148b5462998d8b5f485ad0bad55f73e2ef3 Mon Sep 17 00:00:00 2001
From: Harshil Patel <drpatelhh@gmail.com>
Date: Tue, 14 Sep 2021 23:35:43 +0100
Subject: [PATCH 03/14] Update CHANGELOG

---
 CHANGELOG.md | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index be77eb6f..eae33fae 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,18 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [[1.3](https://github.com/nf-core/fetchngs/releases/tag/1.3)] - 2021-09-15
+
+### Enhancements & fixes
+
+* Replaced Python `requests` with `urllib` to fetch ENA metadata
+
+### Software dependencies
+
+| Dependency  | Old version | New version |
+|-------------|-------------|-------------|
+| `python`    | 3.8.3       | 3.9.5       |
+
 ## [[1.2](https://github.com/nf-core/fetchngs/releases/tag/1.2)] - 2021-07-28
 
 ### Enhancements & fixes

From 48c53f268f2d7738df1f9c0c09cc81e378079cf3 Mon Sep 17 00:00:00 2001
From: Harshil Patel <drpatelhh@gmail.com>
Date: Tue, 14 Sep 2021 23:35:54 +0100
Subject: [PATCH 04/14] Bump pipeline version

---
 nextflow.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nextflow.config b/nextflow.config
index 3b3ea780..52382140 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -147,7 +147,7 @@ manifest {
     description     = 'Pipeline to fetch metadata and raw FastQ files from public databases'
     mainScript      = 'main.nf'
     nextflowVersion = '!>=21.04.0'
-    version         = '1.2'
+    version         = '1.3'
 }
 
 // Function to ensure that resource requirements don't go beyond

From 57a303ca77439a7334b0517303c8bb243c836765 Mon Sep 17 00:00:00 2001
From: "Moritz E. Beber" <midnighter@posteo.net>
Date: Wed, 15 Sep 2021 14:21:59 +0200
Subject: [PATCH 05/14] refactor: introduce logging to replace print

---
 bin/sra_ids_to_runinfo.py | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/bin/sra_ids_to_runinfo.py b/bin/sra_ids_to_runinfo.py
index 41a5c0ee..24c6af0d 100755
--- a/bin/sra_ids_to_runinfo.py
+++ b/bin/sra_ids_to_runinfo.py
@@ -6,9 +6,13 @@
 import csv
 import errno
 import argparse
+import logging
 from urllib.request import urlopen
 from urllib.error import URLError, HTTPError
 
+
+logger = logging.getLogger()
+
 ## Example ids supported by this script
 SRA_IDS = ['PRJNA63463', 'SAMN00765663', 'SRA023522', 'SRP003255', 'SRR390278', 'SRS282569', 'SRX111814']
 ENA_IDS = ['ERA2421642', 'ERP120836', 'ERR674736', 'ERS4399631', 'ERX629702', 'PRJEB7743', 'SAMEA3121481']
@@ -48,7 +52,7 @@ def validate_csv_param(param, valid_vals, param_desc):
         if len(intersect) == len(user_vals):
             valid_list = intersect
         else:
-            print("ERROR: Please provide a valid value for {}!\nProvided values = {}\nAccepted values = {}".format(param_desc,param,','.join(valid_vals)))
+            logger.error(f"Please provide a valid value for {param_desc}!\nProvided values = {param}\nAccepted values = {','.join(valid_vals)}")
             sys.exit(1)
     return valid_list
 
@@ -65,12 +69,12 @@ def fetch_url(url, encoding='utf-8'):
         with urlopen(url) as f:
             r = f.read().decode(encoding).splitlines()
     except HTTPError as e:
-        print('The server couldn\'t fulfill the request.')
-        print('Error code: {}'.format(e.code))
+        logger.error("The server couldn't fulfill the request.")
+        logger.error(f"Status: {e.code} {e.reason}")
         sys.exit(1)
     except URLError as e:
-        print('We failed to reach a server.')
-        print('Reason: {}'.format(e.reason))
+        logger.error('We failed to reach a server.')
+        logger.error(f"Reason: {e.reason}")
         sys.exit(1)
     return r
 
@@ -143,7 +147,7 @@ def fetch_sra_runinfo(file_in, file_out, ena_metadata_fields=ENA_METADATA_FIELDS
                                         fout.write('{}\n'.format('\t'.join(header)))
                                     else:
                                         if header != row.keys():
-                                            print("ERROR: Metadata columns do not match for id {}!\nLine: '{}'".format(run_id,line.strip()))
+                                            logger.error(f"Metadata columns do not match for id {run_id}!\nLine: '{line.strip()}'")
                                             sys.exit(1)
                                     fout.write('{}\n'.format('\t'.join([row[x] for x in header])))
                                     total_out += 1
@@ -151,16 +155,16 @@ def fetch_sra_runinfo(file_in, file_out, ena_metadata_fields=ENA_METADATA_FIELDS
                         seen_ids.append(db_id)
 
                         if not ids:
-                            print("ERROR: No matches found for database id {}!\nLine: '{}'".format(db_id,line.strip()))
+                            logger.error(f"No matches found for database id {db_id}!\nLine: '{line.strip()}'")
                             sys.exit(1)
 
                 else:
                     id_str = ', '.join([x + "*" for x in PREFIX_LIST])
-                    print("ERROR: Please provide a valid database id starting with {}!\nLine: '{}'".format(id_str,line.strip()))
+                    logger.error(f"Please provide a valid database id starting with {id_str}!\nLine: '{line.strip()}'")
                     sys.exit(1)
             else:
                 id_str = ', '.join([x + "*" for x in PREFIX_LIST])
-                print("ERROR: Please provide a valid database id starting with {}!\nLine: '{}'".format(id_str,line.strip()))
+                logger.error(f"Please provide a valid database id starting with {id_str}!\nLine: '{line.strip()}'")
                 sys.exit(1)
 
 def main(args=None):
@@ -172,4 +176,5 @@ def main(args=None):
     fetch_sra_runinfo(args.FILE_IN, args.FILE_OUT, ena_metadata_fields)
 
 if __name__ == '__main__':
+    logging.basicConfig(level='INFO', format='[%(levelname)s] %(message)s')
     sys.exit(main())

From 2e132dcced092335dd2fcbc5460f4f53e63928d8 Mon Sep 17 00:00:00 2001
From: "Moritz E. Beber" <midnighter@posteo.net>
Date: Wed, 15 Sep 2021 14:25:05 +0200
Subject: [PATCH 06/14] refactor: make global constants immutable tuples

---
 bin/sra_ids_to_runinfo.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/bin/sra_ids_to_runinfo.py b/bin/sra_ids_to_runinfo.py
index 24c6af0d..eadfd8e5 100755
--- a/bin/sra_ids_to_runinfo.py
+++ b/bin/sra_ids_to_runinfo.py
@@ -14,15 +14,15 @@
 logger = logging.getLogger()
 
 ## Example ids supported by this script
-SRA_IDS = ['PRJNA63463', 'SAMN00765663', 'SRA023522', 'SRP003255', 'SRR390278', 'SRS282569', 'SRX111814']
-ENA_IDS = ['ERA2421642', 'ERP120836', 'ERR674736', 'ERS4399631', 'ERX629702', 'PRJEB7743', 'SAMEA3121481']
-GEO_IDS = ['GSE18729', 'GSM465244']
+SRA_IDS = ('PRJNA63463', 'SAMN00765663', 'SRA023522', 'SRP003255', 'SRR390278', 'SRS282569', 'SRX111814')
+ENA_IDS = ('ERA2421642', 'ERP120836', 'ERR674736', 'ERS4399631', 'ERX629702', 'PRJEB7743', 'SAMEA3121481')
+GEO_IDS = ('GSE18729', 'GSM465244')
 ID_REGEX = r'^[A-Z]+'
 PREFIX_LIST = sorted(list(set([re.search(ID_REGEX,x).group() for x in SRA_IDS + ENA_IDS + GEO_IDS])))
 
 ## List of meta fields fetched from the ENA API - can be overriden by --ena_metadata_fields
 ## Full list of accepted fields can be obtained here: https://www.ebi.ac.uk/ena/portal/api/returnFields?dataPortal=ena&format=tsv&result=read_run
-ENA_METADATA_FIELDS = [
+ENA_METADATA_FIELDS = (
     'accession', 'run_accession', 'experiment_accession', 'sample_accession', 'secondary_sample_accession', 'study_accession', 'secondary_study_accession', 'parent_study', 'submission_accession',
     'run_alias', 'experiment_alias', 'sample_alias', 'study_alias',
     'library_layout', 'library_selection', 'library_source', 'library_strategy', 'library_name',
@@ -32,7 +32,7 @@
     'sample_title', 'experiment_title', 'study_title',
     'description', 'sample_description',
     'fastq_md5', 'fastq_bytes', 'fastq_ftp', 'fastq_galaxy', 'fastq_aspera'
-]
+ )
 
 def parse_args(args=None):
     Description = 'Download and create a run information metadata file from SRA/ENA/GEO identifiers.'

From 99c9a620096a1d9e41a1aa664d2abc85f7d8eb1a Mon Sep 17 00:00:00 2001
From: "Moritz E. Beber" <midnighter@posteo.net>
Date: Wed, 15 Sep 2021 14:29:49 +0200
Subject: [PATCH 07/14] refactor: use compiled regex pattern

---
 bin/sra_ids_to_runinfo.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/bin/sra_ids_to_runinfo.py b/bin/sra_ids_to_runinfo.py
index eadfd8e5..c70df48d 100755
--- a/bin/sra_ids_to_runinfo.py
+++ b/bin/sra_ids_to_runinfo.py
@@ -17,8 +17,8 @@
 SRA_IDS = ('PRJNA63463', 'SAMN00765663', 'SRA023522', 'SRP003255', 'SRR390278', 'SRS282569', 'SRX111814')
 ENA_IDS = ('ERA2421642', 'ERP120836', 'ERR674736', 'ERS4399631', 'ERX629702', 'PRJEB7743', 'SAMEA3121481')
 GEO_IDS = ('GSE18729', 'GSM465244')
-ID_REGEX = r'^[A-Z]+'
-PREFIX_LIST = sorted(list(set([re.search(ID_REGEX,x).group() for x in SRA_IDS + ENA_IDS + GEO_IDS])))
+ID_REGEX = re.compile(r'[A-Z]+')
+PREFIX_LIST = sorted({ID_REGEX.match(x).group() for x in SRA_IDS + ENA_IDS + GEO_IDS})
 
 ## List of meta fields fetched from the ENA API - can be overriden by --ena_metadata_fields
 ## Full list of accepted fields can be obtained here: https://www.ebi.ac.uk/ena/portal/api/returnFields?dataPortal=ena&format=tsv&result=read_run
@@ -116,7 +116,7 @@ def fetch_sra_runinfo(file_in, file_out, ena_metadata_fields=ENA_METADATA_FIELDS
     with open(file_in,"r") as fin, open(file_out,"w") as fout:
         for line in fin:
             db_id = line.strip()
-            match = re.search(ID_REGEX, db_id)
+            match = ID_REGEX.match(db_id)
             if match:
                 prefix = match.group()
                 if prefix in PREFIX_LIST:

From 2ea9dd7173e449f93650169936f195a4ec985ac2 Mon Sep 17 00:00:00 2001
From: "Moritz E. Beber" <midnighter@posteo.net>
Date: Wed, 15 Sep 2021 14:32:54 +0200
Subject: [PATCH 08/14] refactor: use set for better lookup performance

---
 bin/sra_ids_to_runinfo.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/bin/sra_ids_to_runinfo.py b/bin/sra_ids_to_runinfo.py
index c70df48d..f90f7d9d 100755
--- a/bin/sra_ids_to_runinfo.py
+++ b/bin/sra_ids_to_runinfo.py
@@ -110,7 +110,8 @@ def get_ena_fields():
 
 def fetch_sra_runinfo(file_in, file_out, ena_metadata_fields=ENA_METADATA_FIELDS):
     total_out = 0
-    seen_ids = []; run_ids = []
+    seen_ids = set()
+    run_ids = set()
     header = []
     make_dir(os.path.dirname(file_out))
     with open(file_in,"r") as fin, open(file_out,"w") as fout:
@@ -120,7 +121,7 @@ def fetch_sra_runinfo(file_in, file_out, ena_metadata_fields=ENA_METADATA_FIELDS
             if match:
                 prefix = match.group()
                 if prefix in PREFIX_LIST:
-                    if not db_id in seen_ids:
+                    if db_id not in seen_ids:
 
                         ids = [db_id]
                         ## Resolve/expand these ids against GEO URL
@@ -141,7 +142,7 @@ def fetch_sra_runinfo(file_in, file_out, ena_metadata_fields=ENA_METADATA_FIELDS
                             csv_dict = csv.DictReader(fetch_url(url), delimiter='\t')
                             for row in csv_dict:
                                 run_id = row['run_accession']
-                                if not run_id in run_ids:
+                                if run_id not in run_ids:
                                     if total_out == 0:
                                         header = row.keys()
                                         fout.write('{}\n'.format('\t'.join(header)))
@@ -151,8 +152,8 @@ def fetch_sra_runinfo(file_in, file_out, ena_metadata_fields=ENA_METADATA_FIELDS
                                             sys.exit(1)
                                     fout.write('{}\n'.format('\t'.join([row[x] for x in header])))
                                     total_out += 1
-                                    run_ids.append(run_id)
-                        seen_ids.append(db_id)
+                                    run_ids.add(run_id)
+                        seen_ids.add(db_id)
 
                         if not ids:
                             logger.error(f"No matches found for database id {db_id}!\nLine: '{line.strip()}'")

From 473b2378f51104148b61b2737c39357e0c9a10a3 Mon Sep 17 00:00:00 2001
From: "Moritz E. Beber" <midnighter@posteo.net>
Date: Wed, 15 Sep 2021 15:19:04 +0200
Subject: [PATCH 09/14] feat: enable decompression and decoding from HTTP
 headers

---
 bin/sra_ids_to_runinfo.py | 84 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 80 insertions(+), 4 deletions(-)

diff --git a/bin/sra_ids_to_runinfo.py b/bin/sra_ids_to_runinfo.py
index f90f7d9d..1aef2cfe 100755
--- a/bin/sra_ids_to_runinfo.py
+++ b/bin/sra_ids_to_runinfo.py
@@ -7,6 +7,9 @@
 import errno
 import argparse
 import logging
+import gzip
+import zlib
+import cgi
 from urllib.request import urlopen
 from urllib.error import URLError, HTTPError
 
@@ -34,6 +37,79 @@
     'fastq_md5', 'fastq_bytes', 'fastq_ftp', 'fastq_galaxy', 'fastq_aspera'
  )
 
+
+class Response:
+    """
+    Define an HTTP response class.
+
+    This class should not have to be instantiated directly.
+
+    Attributes:
+        status (int): The numeric HTTP status code of the response.
+        reason (str): The response's reason phrase.
+        body (bytes): The response's decompressed body content as bytes.
+
+    Methods:
+        text: The response's body as a decoded string.
+
+    """
+
+    def __init__(self, *, response, **kwargs) -> None:
+        """
+        Initialize an HTTP response object.
+
+        Args:
+            response (http.client.HTTPResponse): A standard library response object
+                that is wrapped by this class.
+            **kwargs: Passed to parent classes.
+
+        """
+        super().__init__(**kwargs)
+        self._response = response
+        # Immediately read the body while the response context is still available.
+        self._raw = self._response.read()
+        self._content = None
+
+    def _decompress(self):
+        """Decompress the response body if necessary."""
+        method = self._response.getheader("Content-Encoding", "")
+        if not method:
+            self._content = self._raw
+            return
+        if method == "gzip":
+            self._content = gzip.decompress(self._raw)
+        elif method == "deflate":
+            self._content = zlib.decompress(self._raw)
+        else:
+            raise ValueError(f"Unsupported compression: {method}")
+
+    @property
+    def status(self):
+        """Get the response's HTTP status code."""
+        return self._response.status
+
+    @property
+    def reason(self):
+        """Get the response's reason phrase."""
+        return self._response.reason
+
+    @property
+    def body(self):
+        """Get the response's decompressed body content as bytes."""
+        if self._content is None:
+            self._decompress()
+        return self._content
+
+    def text(self, encoding=None):
+        """Return the response's body as a decoded string."""
+        if encoding is not None:
+            return self._content.decode(encoding)
+
+        _, params = cgi.parse_header(self._response.getheader("Content-Type", ""))
+        encoding = params.get("charset", "utf-8")
+        return self._content.decode(encoding)
+
+
 def parse_args(args=None):
     Description = 'Download and create a run information metadata file from SRA/ENA/GEO identifiers.'
     Epilog = 'Example usage: python fetch_sra_runinfo.py <FILE_IN> <FILE_OUT>'
@@ -64,10 +140,10 @@ def make_dir(path):
             if exception.errno != errno.EEXIST:
                 raise
 
-def fetch_url(url, encoding='utf-8'):
+def fetch_url(url):
     try:
-        with urlopen(url) as f:
-            r = f.read().decode(encoding).splitlines()
+        with urlopen(url) as response:
+            result = Response(response=response).text().splitlines()
     except HTTPError as e:
         logger.error("The server couldn't fulfill the request.")
         logger.error(f"Status: {e.code} {e.reason}")
@@ -76,7 +152,7 @@ def fetch_url(url, encoding='utf-8'):
         logger.error('We failed to reach a server.')
         logger.error(f"Reason: {e.reason}")
         sys.exit(1)
-    return r
+    return result
 
 def id_to_srx(db_id):
     ids = []

From 7798aa1a63d68e9162c7dc4714f78a9775dc7011 Mon Sep 17 00:00:00 2001
From: "Moritz E. Beber" <midnighter@posteo.net>
Date: Wed, 15 Sep 2021 15:25:33 +0200
Subject: [PATCH 10/14] refactor: use f-strings where possible

---
 bin/sra_ids_to_runinfo.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/bin/sra_ids_to_runinfo.py b/bin/sra_ids_to_runinfo.py
index 1aef2cfe..4b4ed2d7 100755
--- a/bin/sra_ids_to_runinfo.py
+++ b/bin/sra_ids_to_runinfo.py
@@ -117,7 +117,7 @@ def parse_args(args=None):
     parser = argparse.ArgumentParser(description=Description, epilog=Epilog)
     parser.add_argument('FILE_IN', help="File containing database identifiers, one per line.")
     parser.add_argument('FILE_OUT', help="Output file in tab-delimited format.")
-    parser.add_argument('-ef', '--ena_metadata_fields', type=str, dest="ENA_METADATA_FIELDS", default='', help="Comma-separated list of ENA metadata fields to fetch. (default: {}).".format(','.join(ENA_METADATA_FIELDS)))
+    parser.add_argument('-ef', '--ena_metadata_fields', type=str, dest="ENA_METADATA_FIELDS", default='', help=f"Comma-separated list of ENA metadata fields to fetch. (default: {','.join(ENA_METADATA_FIELDS)}).")
     return parser.parse_args(args)
 
 def validate_csv_param(param, valid_vals, param_desc):
@@ -220,13 +220,15 @@ def fetch_sra_runinfo(file_in, file_out, ena_metadata_fields=ENA_METADATA_FIELDS
                                 run_id = row['run_accession']
                                 if run_id not in run_ids:
                                     if total_out == 0:
-                                        header = row.keys()
-                                        fout.write('{}\n'.format('\t'.join(header)))
+                                        header = '\t'.join(row.keys())
+                                        fout.write(f"{header}\n")
                                     else:
                                         if header != row.keys():
                                             logger.error(f"Metadata columns do not match for id {run_id}!\nLine: '{line.strip()}'")
                                             sys.exit(1)
-                                    fout.write('{}\n'.format('\t'.join([row[x] for x in header])))
+
+                                    ordered_row = '\t'.join([row[x] for x in header])
+                                    fout.write(f'{ordered_row}\n')
                                     total_out += 1
                                     run_ids.add(run_id)
                         seen_ids.add(db_id)

From eaca802ba4ee15883a049c523b5a7c104347cd7e Mon Sep 17 00:00:00 2001
From: "Moritz E. Beber" <midnighter@posteo.net>
Date: Wed, 15 Sep 2021 15:52:17 +0200
Subject: [PATCH 11/14] refactor: manage URLs with encoding

---
 bin/sra_ids_to_runinfo.py | 79 +++++++++++++++++++++++++--------------
 1 file changed, 51 insertions(+), 28 deletions(-)

diff --git a/bin/sra_ids_to_runinfo.py b/bin/sra_ids_to_runinfo.py
index 4b4ed2d7..87fe51f1 100755
--- a/bin/sra_ids_to_runinfo.py
+++ b/bin/sra_ids_to_runinfo.py
@@ -1,18 +1,18 @@
 #!/usr/bin/env python
 
-import os
-import re
-import sys
+import argparse
+import cgi
 import csv
 import errno
-import argparse
-import logging
 import gzip
+import logging
+import os
+import re
+import sys
 import zlib
-import cgi
-from urllib.request import urlopen
 from urllib.error import URLError, HTTPError
-
+from urllib.parse import urlencode
+from urllib.request import urlopen
 
 logger = logging.getLogger()
 
@@ -155,34 +155,53 @@ def fetch_url(url):
     return result
 
 def id_to_srx(db_id):
-    ids = []
-    url = 'https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?save=efetch&db=sra&rettype=runinfo&term={}'.format(db_id)
-    for row in csv.DictReader(fetch_url(url), delimiter=','):
-        ids.append(row['Experiment'])
-    return ids
+    params = {
+        "save": "efetch",
+        "db": "sra",
+        "rettype": "runinfo",
+        "term": db_id
+    }
+    url = f'https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?{urlencode(params)}'
+    return [
+        row['Experiment'] for row in csv.DictReader(fetch_url(url), delimiter=',')
+    ]
 
 def id_to_erx(db_id):
-    ids = []
     fields = ['run_accession', 'experiment_accession']
-    url = 'https://www.ebi.ac.uk/ena/portal/api/filereport?accession={}&result=read_run&fields={}'.format(db_id,','.join(fields))
-    for row in csv.DictReader(fetch_url(url), delimiter='\t'):
-        ids.append(row['experiment_accession'])
-    return ids
+    params = {
+        "accession": db_id,
+        "result": "read_run",
+        "fields": ",".join(fields)
+    }
+    url = f'https://www.ebi.ac.uk/ena/portal/api/filereport?{urlencode(params)}'
+    return [
+        row['experiment_accession'] for row in csv.DictReader(fetch_url(url), delimiter='\t')
+    ]
 
 def gse_to_srx(db_id):
     ids = []
-    url = 'https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc={}&targ=gsm&view=data&form=text'.format(db_id)
-    gsm_ids = [x.split('=')[1].strip() for x in fetch_url(url) if x.find('GSM') != -1]
+    params = {
+        "acc": db_id,
+        "targ": "gsm",
+        "view": "data",
+        "form": "text"
+    }
+    url = f'https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?{urlencode(params)}'
+    gsm_ids = [x.split('=')[1].strip() for x in fetch_url(url) if x.startswith('GSM')]
     for gsm_id in gsm_ids:
         ids += id_to_srx(gsm_id)
     return ids
 
 def get_ena_fields():
-    fields = []
-    url = 'https://www.ebi.ac.uk/ena/portal/api/returnFields?dataPortal=ena&format=tsv&result=read_run'
-    for row in csv.DictReader(fetch_url(url), delimiter='\t'):
-        fields.append(row['columnId'])
-    return fields
+    params = {
+        "dataPortal": "ena",
+        "format": "tsv",
+        "result": "read_run"
+    }
+    url = f'https://www.ebi.ac.uk/ena/portal/api/returnFields?{urlencode(params)}'
+    return [
+        row['columnId'] for row in csv.DictReader(fetch_url(url), delimiter='\t')
+    ]
 
 def fetch_sra_runinfo(file_in, file_out, ena_metadata_fields=ENA_METADATA_FIELDS):
     total_out = 0
@@ -190,6 +209,10 @@ def fetch_sra_runinfo(file_in, file_out, ena_metadata_fields=ENA_METADATA_FIELDS
     run_ids = set()
     header = []
     make_dir(os.path.dirname(file_out))
+    params = {
+        "result": "read_run",
+        "fields": ','.join(ena_metadata_fields)
+    }
     with open(file_in,"r") as fin, open(file_out,"w") as fout:
         for line in fin:
             db_id = line.strip()
@@ -214,9 +237,9 @@ def fetch_sra_runinfo(file_in, file_out, ena_metadata_fields=ENA_METADATA_FIELDS
 
                         ## Resolve/expand to get run identifier from ENA and write to file
                         for id in ids:
-                            url = 'https://www.ebi.ac.uk/ena/portal/api/filereport?accession={}&result=read_run&fields={}'.format(id,','.join(ena_metadata_fields))
-                            csv_dict = csv.DictReader(fetch_url(url), delimiter='\t')
-                            for row in csv_dict:
+                            params["accession"] = id
+                            url = f'https://www.ebi.ac.uk/ena/portal/api/filereport?{urlencode(params)}'
+                            for row in csv.DictReader(fetch_url(url), delimiter='\t'):
                                 run_id = row['run_accession']
                                 if run_id not in run_ids:
                                     if total_out == 0:

From fa38ddaacd1cc48c6c1d6616a0116885fcc6f0be Mon Sep 17 00:00:00 2001
From: "Moritz E. Beber" <midnighter@posteo.net>
Date: Wed, 15 Sep 2021 16:07:31 +0200
Subject: [PATCH 12/14] fix: maintain header list in variable, not string

---
 bin/sra_ids_to_runinfo.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/bin/sra_ids_to_runinfo.py b/bin/sra_ids_to_runinfo.py
index 87fe51f1..11fe5247 100755
--- a/bin/sra_ids_to_runinfo.py
+++ b/bin/sra_ids_to_runinfo.py
@@ -103,11 +103,11 @@ def body(self):
     def text(self, encoding=None):
         """Return the response's body as a decoded string."""
         if encoding is not None:
-            return self._content.decode(encoding)
+            return self.body.decode(encoding)
 
         _, params = cgi.parse_header(self._response.getheader("Content-Type", ""))
         encoding = params.get("charset", "utf-8")
-        return self._content.decode(encoding)
+        return self.body.decode(encoding)
 
 
 def parse_args(args=None):
@@ -243,8 +243,9 @@ def fetch_sra_runinfo(file_in, file_out, ena_metadata_fields=ENA_METADATA_FIELDS
                                 run_id = row['run_accession']
                                 if run_id not in run_ids:
                                     if total_out == 0:
-                                        header = '\t'.join(row.keys())
-                                        fout.write(f"{header}\n")
+                                        header = row.keys()
+                                        header_line = '\t'.join(header)
+                                        fout.write(f"{header_line}\n")
                                     else:
                                         if header != row.keys():
                                             logger.error(f"Metadata columns do not match for id {run_id}!\nLine: '{line.strip()}'")

From 1f7ae6a7cb17e681accf2940460483043ff3b69b Mon Sep 17 00:00:00 2001
From: "Moritz E. Beber" <midnighter@posteo.net>
Date: Wed, 15 Sep 2021 16:13:53 +0200
Subject: [PATCH 13/14] style: remove space

---
 bin/sra_ids_to_runinfo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/sra_ids_to_runinfo.py b/bin/sra_ids_to_runinfo.py
index 11fe5247..68b1c924 100755
--- a/bin/sra_ids_to_runinfo.py
+++ b/bin/sra_ids_to_runinfo.py
@@ -35,7 +35,7 @@
     'sample_title', 'experiment_title', 'study_title',
     'description', 'sample_description',
     'fastq_md5', 'fastq_bytes', 'fastq_ftp', 'fastq_galaxy', 'fastq_aspera'
- )
+)
 
 
 class Response:

From ce7ed6016ef67377651960b8fb212a80dcdca4ea Mon Sep 17 00:00:00 2001
From: "Moritz E. Beber" <midnighter@posteo.net>
Date: Wed, 15 Sep 2021 18:04:27 +0200
Subject: [PATCH 14/14] style: remove last type annotation

---
 bin/sra_ids_to_runinfo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/sra_ids_to_runinfo.py b/bin/sra_ids_to_runinfo.py
index 68b1c924..ae60f545 100755
--- a/bin/sra_ids_to_runinfo.py
+++ b/bin/sra_ids_to_runinfo.py
@@ -54,7 +54,7 @@ class Response:
 
     """
 
-    def __init__(self, *, response, **kwargs) -> None:
+    def __init__(self, *, response, **kwargs):
         """
         Initialize an HTTP response object.