Skip to content

Commit

Permalink
Added flake8 style check and made code compliant
Browse files Browse the repository at this point in the history
  • Loading branch information
mluypaert committed Mar 14, 2024
1 parent 3b0dca6 commit 2207ad6
Show file tree
Hide file tree
Showing 8 changed files with 68 additions and 26 deletions.
24 changes: 22 additions & 2 deletions .github/workflows/PR-validation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ jobs:
- name: Build container image
run: |
make container-image
pipeline-seq-retrieval-check-python-typing:
name: pipeline/seq_retrieval check python typing
pipeline-seq-retrieval-python-typing-check:
name: pipeline/seq_retrieval python typing check
runs-on: ubuntu-22.04
defaults:
run:
Expand All @@ -42,5 +42,25 @@ jobs:
- name: Check python typing
run: |
make check-python-typing
pipeline-seq-retrieval-python-style-check:
name: pipeline/seq_retrieval python style check
runs-on: ubuntu-22.04
defaults:
run:
shell: bash
working-directory: ./pipeline/seq_retrieval/
steps:
- name: Check out repository code
uses: actions/checkout@v4
with:
fetch-depth: 0
sparse-checkout: |
pipeline/seq_retrieval/
- uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Check python style
run: |
make check-python-style
#TODO: add unit testing
#TODO: add integration testing
6 changes: 6 additions & 0 deletions pipeline/seq_retrieval/.flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[flake8]
ignore = E203, E266, E501, F401, W503, E501
max-complexity = 18
select = B,C,E,F,W,T4
exclude = venv
per-file-ignores = __init__.py:F401
7 changes: 6 additions & 1 deletion pipeline/seq_retrieval/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,15 @@ python-dependencies:
python-dependencies-update:
pip install -U -r requirements.txt

check-python-typing: python-dependencies
python-test-dependencies:
pip install -r tests/requirements.txt

check-python-typing: python-dependencies python-test-dependencies
mypy --install-types --non-interactive src/main.py

check-python-style: python-dependencies python-test-dependencies
flake8 ./

check-venv-active:
ifeq ($(VIRTUAL_ENV),)
@echo 'No active python virtual environment found.'\
Expand Down
17 changes: 11 additions & 6 deletions pipeline/seq_retrieval/src/data_mover/data_file_mover.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
Change the value through the `set_local_cache_reuse` function.
"""


def set_local_cache_reuse(reuse: bool) -> None:
"""
Define data_file_mover module-level behaviour on local file cache reuse.
Expand All @@ -36,6 +37,7 @@ def set_local_cache_reuse(reuse: bool) -> None:
global _reuse_local_cache
_reuse_local_cache = reuse


def is_accessible_url(url: str) -> bool:
"""
Check whether provided `url` is an accessible (remote) URL
Expand All @@ -52,6 +54,7 @@ def is_accessible_url(url: str) -> bool:
else:
return False


def fetch_file(url: str, dest_dir: str = _DEFAULT_DIR, reuse_local_cache: Optional[bool] = None) -> str:
"""
Fetch file from URL and return its local path.
Expand Down Expand Up @@ -83,6 +86,7 @@ def fetch_file(url: str, dest_dir: str = _DEFAULT_DIR, reuse_local_cache: Option

return local_path


def find_local_file(path: str) -> str:
"""
Find a file locally based on path and return its absolute path.
Expand All @@ -104,7 +108,8 @@ def find_local_file(path: str) -> str:
else:
return str(Path(path).resolve())

def download_from_url(url: str, dest_dir: str = _DEFAULT_DIR, chunk_size = 10 * 1024, reuse_local_cache: Optional[bool] = None) -> str:

def download_from_url(url: str, dest_dir: str = _DEFAULT_DIR, chunk_size: int = 10 * 1024, reuse_local_cache: Optional[bool] = None) -> str:
"""
Download file from remote URL and return its absolute local path.
Expand All @@ -125,7 +130,7 @@ def download_from_url(url: str, dest_dir: str = _DEFAULT_DIR, chunk_size = 10 *
`ValueError`: if `url` scheme is not supported or `url` is not accessible.
"""

if reuse_local_cache == None:
if reuse_local_cache is None:
reuse_local_cache = _reuse_local_cache

url_components = urlparse(url)
Expand All @@ -140,13 +145,13 @@ def download_from_url(url: str, dest_dir: str = _DEFAULT_DIR, chunk_size = 10 *
local_file_path = os.path.join(dest_dir, filename)

if os.path.exists(local_file_path) and os.path.isfile(local_file_path):
if reuse_local_cache == True:
#Return the local file path without downloading new content
if reuse_local_cache is True:
# Return the local file path without downloading new content
return str(Path(local_file_path).resolve())
else:
os.remove(local_file_path)

#Download file through streaming to support large files
# Download file through streaming to support large files
tmp_file_path = f"{local_file_path}.part"
response = requests.get(url, stream=True)

Expand All @@ -158,5 +163,5 @@ def download_from_url(url: str, dest_dir: str = _DEFAULT_DIR, chunk_size = 10 *

return str(Path(local_file_path).resolve())
else:
#Currently not supported
# Currently not supported
raise ValueError(f"URL with scheme '{url_components.scheme}' is currently not supported.")
18 changes: 11 additions & 7 deletions pipeline/seq_retrieval/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import data_mover.data_file_mover as data_file_mover
from seq_region import SeqRegion, chain_seq_region_seqs


def validate_strand_param(ctx, param, value: str) -> Literal['+', '-']:
"""
Processes and normalises the value of click input argument `strand`.
Expand All @@ -22,14 +23,15 @@ def validate_strand_param(ctx, param, value: str) -> Literal['+', '-']:
"""
POS_CHOICES = ['+', '+1', 'pos']
NEG_CHOICES = ['-', '-1', 'neg']
if value in POS_CHOICES:
if value in POS_CHOICES:
return '+'
elif value in NEG_CHOICES:
return '-'
else:
raise click.BadParameter(f"Must be one of {POS_CHOICES} for positive strand, or {NEG_CHOICES} for negative strand.")

def process_seq_regions_param(ctx, param, value: str) -> List[Dict[str,Any]]:

def process_seq_regions_param(ctx, param, value: str) -> List[Dict[str, Any]]:
"""
Parse the value of click input parameter seq_regions and validate it's structure.
Expand All @@ -47,8 +49,8 @@ def process_seq_regions_param(ctx, param, value: str) -> List[Dict[str,Any]]:
seq_regions = None
try:
seq_regions = json.loads(value)
except:
raise click.BadParameter(f"Must be a valid JSON-formatted string.")
except Exception:
raise click.BadParameter("Must be a valid JSON-formatted string.")
else:
if not isinstance(seq_regions, list):
raise click.BadParameter("Must be a valid list (JSON-array) of sequence regions to retrieve.")
Expand All @@ -66,6 +68,7 @@ def process_seq_regions_param(ctx, param, value: str) -> List[Dict[str,Any]]:

return seq_regions


@click.command()
@click.option("--seq_id", type=click.STRING, required=True,
help="The sequence ID to retrieve sequences for.")
Expand Down Expand Up @@ -96,15 +99,16 @@ def main(seq_id: str, seq_strand: str, seq_regions: List, fasta_file_url: str, r
seq_region_objs = []
for region in seq_regions:
seq_region_objs.append(SeqRegion(seq_id=seq_id, start=region['start'], end=region['end'], strand=seq_strand,
fasta_file_url=fasta_file_url))
fasta_file_url=fasta_file_url))

for seq_region in seq_region_objs:
#Retrieve sequence for region
# Retrieve sequence for region
seq_region.fetch_seq()

#Concatenate all regions into single sequence
# Concatenate all regions into single sequence
seq_concat = chain_seq_region_seqs(seq_region_objs, seq_strand)
click.echo(f"\nSeq concat: {seq_concat}")


if __name__ == '__main__':
main()
2 changes: 1 addition & 1 deletion pipeline/seq_retrieval/src/seq_region/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from .seq_region import SeqRegion
from .seq_region import chain_seq_region_seqs
from .seq_region import chain_seq_region_seqs
17 changes: 9 additions & 8 deletions pipeline/seq_retrieval/src/seq_region/seq_region.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
"""
from typing import Any, Dict, List, Optional

from Bio import Seq #Bio.Seq biopython submodule
from Bio import Seq # Bio.Seq biopython submodule
import pysam

from data_mover import data_file_mover


class SeqRegion():
"""
Defines a DNA sequence region.
Expand Down Expand Up @@ -52,15 +53,15 @@ def __init__(self, seq_id: str, start: int, end: int, strand: str, fasta_file_ur
self.seq_id = seq_id
self.strand = strand

#If strand is -, ensure start <= end (swap as required)
# If strand is -, ensure start <= end (swap as required)
if strand == '-':
if end < start:
self.start = end
self.end = start
else:
self.start = start
self.end = end
#If strand is +, throw error when end < start (likely user error)
# If strand is +, throw error when end < start (likely user error)
else:
if end < start:
raise ValueError(f"Unexpected position order: end {end} < start {start}.")
Expand All @@ -73,19 +74,18 @@ def __init__(self, seq_id: str, start: int, end: int, strand: str, fasta_file_ur

# Fetch additional faidx index files in addition to fasta file itself
# (to the same location)
index_files = [fasta_file_url+'.fai']
index_files = [fasta_file_url + '.fai']
if fasta_file_url.endswith('.gz'):
index_files.append(fasta_file_url+'.gzi')
index_files.append(fasta_file_url + '.gzi')

for index_file in index_files:
data_file_mover.fetch_file(index_file)

self.fasta_file_path = local_fasta_file_path

if seq != None:
if seq is not None:
self.sequence = seq


def fetch_seq(self) -> None:
"""
Fetch sequence found at `seq_id`:`start`-`end`:`strand`
Expand All @@ -100,7 +100,7 @@ def fetch_seq(self) -> None:
except IOError:
raise IOError(f"Error while reading fasta file or index matching path {self.fasta_file_path}.")
else:
seq = fasta_file.fetch(reference=self.seq_id, start=self.start-1, end=self.end)
seq = fasta_file.fetch(reference=self.seq_id, start=(self.start - 1), end=self.end)
fasta_file.close()

if self.strand == '-':
Expand Down Expand Up @@ -132,6 +132,7 @@ def get_sequence(self) -> str:
"""Return `sequence` attribute as a string (empty string if `None`)."""
return str(self.sequence)


def chain_seq_region_seqs(seq_regions: List[SeqRegion], seq_strand: str) -> str:
"""
Chain multiple SeqRegions' sequenes together into one continuous sequence.
Expand Down
3 changes: 2 additions & 1 deletion pipeline/seq_retrieval/tests/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
mypy==1.9.*
flake8==7.0.*
mypy==1.9.*

0 comments on commit 2207ad6

Please sign in to comment.