Skip to content

Commit

Permalink
Merge pull request #159 from pdimens/cli_validations
Browse files Browse the repository at this point in the history
add command-line level validations
  • Loading branch information
pdimens authored Nov 5, 2024
2 parents 6b98fdf + 408114b commit 31c905d
Show file tree
Hide file tree
Showing 24 changed files with 537 additions and 266 deletions.
3 changes: 2 additions & 1 deletion .github/filters.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
common: &common
- 'harpy/_launch.py'
- 'harpy/_misc.py'
- 'harpy/_cli_types.py'
- 'harpy/_cli_types_generic.py'
- 'harpy/_cli_types_params.py'
- 'harpy/_parsers.py'
- 'harpy/_printing.py'
- 'harpy/_validations.py'
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ jobs:
path: .snakemake/singularity
- name: harpy qc
shell: micromamba-shell {0}
run: harpy qc -x "--trim_poly_g" --quiet test/fastq
run: harpy qc -x "--low_complexity_filter" --quiet test/fastq
- name: harpy qc all options
shell: micromamba-shell {0}
run: harpy qc -a -d -c 21,40,3,0 --quiet test/fastq
Expand Down Expand Up @@ -344,7 +344,7 @@ jobs:
path: .snakemake/singularity
- name: test ema
shell: micromamba-shell {0}
run: harpy align ema --quiet --ema-bins 150 -g test/genome/genome.fasta.gz -x "-d" test/fastq
run: harpy align ema --quiet --ema-bins 150 -g test/genome/genome.fasta.gz test/fastq

strobe:
needs: [changes, pkgbuild]
Expand Down
54 changes: 0 additions & 54 deletions harpy/_cli_types.py

This file was deleted.

117 changes: 117 additions & 0 deletions harpy/_cli_types_generic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
"""Module with python-click types for command-line level validations of inputs"""

import os
import click

class IntList(click.ParamType):
"""A class for a click type which accepts an arbitrary number of integers, separated by a comma."""
name = "int_list"
def __init__(self, max_entries):
super().__init__()
self.max_entries = max_entries

def convert(self, value, param, ctx):
try:
parts = [i.strip() for i in value.split(',')]
if len(parts) != self.max_entries:
raise ValueError
for i in parts:
try:
int(i)
except:
raise ValueError
return [int(i) for i in parts]
except ValueError:
self.fail(f"{value} is not a valid list of integers. The value should be {self.max_entries} integers separated by a comma.", param, ctx)

class KParam(click.ParamType):
"""A class for a click type which accepts any number of odd integers separated by a comma, or the word auto."""
name = "k_param"
def convert(self, value, param, ctx):
try:
if value == "auto":
return value
parts = [i.strip() for i in value.split(',')]
for i in parts:
if int(i) % 2 == 0 or int(i) > 128:
raise ValueError
return [int(i) for i in parts]
except ValueError:
self.fail(f"{value} is not 'auto' or odd integers <128 separated by a comma.", param, ctx)

class ContigList(click.ParamType):
"""A class for a click type which accepts a file of contigs or a list of contigs separated by a comma."""
name = "contig_list"
def convert(self, value, param, ctx):
# check if it's a file
if os.path.exists(value):
if not os.path.isfile(value):
self.fail(f"{value} is not a file.", param, ctx)
with open(value, "r") as cont_in:
return [i.strip() for i in cont_in.readlines()]
else:
return [i.strip() for i in value.split(',')]

class InputFile(click.ParamType):
"""A class for a click type that verifies that a file exists and that it has an expected extension"""
def __init__(self, filetype, gzip_ok):
super().__init__()
self.filetype = filetype
self.gzip_ok = gzip_ok
def convert(self, value, param, ctx):
filedict = {
"fasta": [".fasta", ".fas", ".fa", ".fna", ".ffn", ".faa", ".frn"],
"vcf": ["vcf", "bcf", "vcf.gz"],
"gff": [".gff",".gff3"]
}
if self.filetype not in filedict.keys():
self.fail(f"Extension validation for {self.filetype} is not yet implemented. This error should only appear during development; if you are a user and seeing this, please post an issue on GitHub: https://github.com/pdimens/harpy/issues/new?assignees=&labels=bug&projects=&template=bug_report.yml")
if not os.path.exists(value):
self.fail(f"{value} does not exist. Please check the spelling and try again.", param, ctx)
elif not os.access(value, os.R_OK):
self.fail(f"{value} is not readable. Please check file/directory permissions and try again", param, ctx)
if os.path.isdir(value):
self.fail(f"{value} is a directory, but input should be a file.", param, ctx)
valid = False
lowercase = value.lower()
for ext in filedict[self.filetype]:
valid = True if lowercase.endswith(ext) else valid
if self.gzip_ok:
valid = True if lowercase.endswith(ext + ".gz") else valid
if not valid and not self.gzip_ok:
self.fail(f"{value} does not end with one of the expected extensions [" + ", ".join(filedict[self.filetype]) + "]. Please verify this is the correct file type and rename the extension for compatibility.", param, ctx)
if not valid and self.gzip_ok:
self.fail(f"{value} does not end with one of the expected extensions [" + ", ".join(filedict[self.filetype]) + "]. Please verify this is the correct file type and rename the extension for compatibility. Gzip compression (ending in .gz) is allowed.", param, ctx)
return value

class SnakemakeParams(click.ParamType):
"""A class for a click type which accepts snakemake parameters. Does validations to make sure there isn't doubling up."""
name = "snakemake_params"
def convert(self, value, param, ctx):
forbidden = "--rerun-incomplete --ri --show-failed-logs --rerun-triggers --nolock --software-deployment-method --smd --deployment --deployment-method --conda-prefix --cores -c --directory -d --snakefile -s --configfile --configfiles".split()
available = "--dry-run --dryrun -n --profile --cache --jobs -j --local-cores --resources --res --set-threads --max-threads --set-resources --set-scatter --set-resource-scopes --default-resources --default-res --preemptible-rules --preemptible-retries --envvars --touch -t --keep-going -k --force -f --executor -e --forceall -F --forcerun -R --prioritize -P --batch --until -U --omit-from -O --shadow-prefixDIR --scheduler --wms-monitor --wms-monitor-arg --scheduler-ilp-solver --conda-base-path --no-subworkflows --nosw --precommand --groups --group-components --report --report-stylesheet --reporterPLUGIN --draft-notebook --edit-notebook --notebook-listen --lint --generate-unit-tests --containerize --export-cwl --list-rules --list -l --list-target-rules --lt --dag --rulegraph --filegraph --d3dag --summary -S --detailed-summary -D --archive --cleanup-metadata --cmFILE --cleanup-shadow --skip-script-cleanup --unlock --list-changes --lc --list-input-changes --li --list-params-changes --lp --list-untracked --lu --delete-all-output --delete-temp-output --keep-incomplete --drop-metadata --version -v --printshellcmds -p --debug-dag --nocolor --quiet -q --print-compilation --verbose --force-use-threads --allow-ambiguity -a --ignore-incomplete --ii --max-inventory-time --latency-wait --output-wait -w --wait-for-files --wait-for-files-file --queue-input-wait-time --notemp --nt --all-temp --unneeded-temp-files --keep-storage-local-copies --target-files-omit-workdir-adjustment --allowed-rules --max-jobs-per-timespan --max-jobs-per-second --max-status-checks-per-second --seconds-between-status-checks --retries --restart-times -T --wrapper-prefix --default-storage-provider --default-storage-prefix --local-storage-prefix --remote-job-local-storage-prefix --shared-fs-usage --scheduler-greediness --greediness --no-hooks --debug --runtime-profile --local-groupid --attempt --log-handler-script --log-service --job-deploy-sources --benchmark-extended --container-image --immediate-submit --is --jobscript --js --jobname --jn --flux --container-cleanup-images --use-conda --conda-not-block-search-path-envvars --list-conda-envs --conda-cleanup-envs --conda-cleanup-pkgs --conda-create-envs-only --conda-frontend --use-apptainer --use-singularity --apptainer-prefix --singularity-prefix --apptainer-args --singularity-args --use-envmodules --scheduler-solver-path --deploy-sources --target-jobs --mode --report-html-path --report-html-stylesheet-path".split()
for i in value.split():
if i.startswith("-"):
if i in forbidden:
self.fail(f"{i} is a forbidden option because it is already used by Harpy to call Snakemake.", param, ctx)
if i not in available:
self.fail(f"{i} is not a valid Snakemake option. Run \'snakemake --help\' for a list of all Snakemake command line options.", param, ctx)
return value

class HPCProfile(click.ParamType):
"""A class for a click type which accepts a directory with a snakemake HPC profile. Does validations to make sure the config file is there."""
name = "hpc_profile"
def convert(self, value, param, ctx):
if not os.path.exists(value):
self.fail(f"{value} does not exist. Please check the spelling and try again.", param, ctx)
elif not os.access(value, os.R_OK):
self.fail(f"{value} is not readable. Please check file/directory permissions and try again", param, ctx)
if os.path.isfile(value):
self.fail(f"{value} is a file, but input should be a directory.", param, ctx)
if not os.path.exists(f"{value}/config.yaml"):
self.fail(f"{value} does not contain the necessary config.yaml file.", param, ctx)
elif not os.access(f"{value}/config.yaml", os.R_OK):
self.fail(f"{value}/config.yaml does not have read access. Please check the file permissions and try again.", param, ctx)
return value

### program-specific extra-params types
Loading

0 comments on commit 31c905d

Please sign in to comment.