Skip to content

Commit

Permalink
add adapter specification (#175)
Browse files Browse the repository at this point in the history
* add adapter specification

* more robust validation
  • Loading branch information
pdimens authored Dec 11, 2024
1 parent 4462cb0 commit 2e8c1b0
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 10 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ jobs:
run: harpy qc -x "--low_complexity_filter" --quiet test/fastq
- name: harpy qc all options
shell: micromamba-shell {0}
run: harpy qc -a -d -c 21,40,3,0 --quiet test/fastq
run: harpy qc -a auto -d -c 21,40,3,0 --quiet test/fastq
deconvolve:
needs: [changes, pkgbuild]
if: ${{ needs.changes.outputs.deconvolve == 'true' && needs.pkgbuild.result == 'success' }}
Expand Down
6 changes: 5 additions & 1 deletion harpy/_validations.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,11 @@ def validate_regions(regioninput, genome):
def check_fasta(genofile):
"""perform validations on fasta file for extensions and file contents"""
# validate fasta file contents
opener = gzip.open if is_gzip(genofile) else open
try:
opener = gzip.open if is_gzip(genofile) else open
except:
print_error("incorrect file format", f"The file must be plain-text or b/gzipped, but failed to be recognized as either. Please check that [blue]{genofile}[/blue] is indeed a fasta file.")
sys.exit(1)
mode = "rt" if is_gzip(genofile) else "r"
line_num = 0
seq_id = 0
Expand Down
19 changes: 15 additions & 4 deletions harpy/qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from ._cli_types_generic import HPCProfile, IntList, SnakemakeParams
from ._cli_types_params import FastpParams
from ._parsers import parse_fastq_inputs
from ._validations import check_fasta

docstring = {
"harpy qc": [
Expand All @@ -34,7 +35,7 @@
@click.option('-n', '--min-length', default = 30, show_default = True, type=int, help = 'Discard reads shorter than this length')
@click.option('-o', '--output-dir', type = click.Path(exists = False), default = "QC", show_default=True, help = 'Output directory name')
@click.option('-t', '--threads', default = 4, show_default = True, type = click.IntRange(min = 4, max_open = True), help = 'Number of threads to use')
@click.option('-a', '--trim-adapters', is_flag = True, default = False, help = 'Detect and trim adapters')
@click.option('-a', '--trim-adapters', type = str, help = 'Detect and trim adapters')
@click.option('--conda', is_flag = True, default = False, help = 'Use conda/mamba instead of a container')
@click.option('--setup-only', is_flag = True, hidden = True, show_default = True, default = False, help = 'Setup the workflow and exit')
@click.option('--hpc', type = HPCProfile(), help = 'Directory with HPC submission `config.yaml` file')
Expand All @@ -49,11 +50,13 @@ def qc(inputs, output_dir, min_length, max_length, trim_adapters, deduplicate, d
Provide the input fastq files and/or directories at the end of the command
as individual files/folders, using shell wildcards (e.g. `data/acronotus*.fq`), or both.
The input reads will be quality trimmed using:
The input reads can be quality trimmed using:
- a sliding window from front to tail
- poly-G tail removal
- `-a` automatically detects and remove adapters
- `-d` finds and remove PCR duplicates
- `-a` remove adapters
- accepts `auto` to automatically detect adapters
- accepts a FASTA file of adapters to remove
- `-d` finds and removes optical PCR duplicates
- `-c` resolves barcodes clashing between unrelated sequences
- off by default, activated with [4 integers](https://github.com/RolandFaure/QuickDeconvolution?tab=readme-ov-file#usage), separated by commas
- use `21,40,3,0` for QuickDeconvolution defaults (or adjust as needed)
Expand All @@ -71,6 +74,14 @@ def qc(inputs, output_dir, min_length, max_length, trim_adapters, deduplicate, d
command += snakemake

fqlist, sample_count = parse_fastq_inputs(inputs)
if trim_adapters:
if trim_adapters != "auto":
if not os.path.exists(trim_adapters):
raise click.BadParameter(f"--trim-adapters was given {trim_adapters}, but that file does not exist. Please check the spelling or verify the location of the file.")
if not os.access(trim_adapters, os.R_OK):
raise click.BadParameter(f"--trim-adapters was given {trim_adapters}, but that file does not have read permissions. Please modify the persmissions of the file to grant read access.")
check_fasta(trim_adapters)

os.makedirs(workflowdir, exist_ok=True)
fetch_rule(workflowdir, "qc.smk")
fetch_report(workflowdir, "bx_count.Rmd")
Expand Down
12 changes: 8 additions & 4 deletions harpy/snakefiles/qc.smk
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ envdir = os.path.join(os.getcwd(), outdir, "workflow", "envs")
min_len = config["min_len"]
max_len = config["max_len"]
extra = config.get("extra", "")
trimadapters = config["trim_adapters"]
trim_adapters = config.get("trim_adapters", None)
dedup = config["deduplicate"]
deconvolve = config.get("deconvolve", False)
if deconvolve:
Expand All @@ -30,6 +30,10 @@ if deconvolve:
skip_reports = config["reports"]["skip"]
bn_r = r"([_\.][12]|[_\.][FR]|[_\.]R[12](?:\_00[0-9])*)?\.((fastq|fq)(\.gz)?)$"
samplenames = {re.sub(bn_r, "", os.path.basename(i), flags = re.IGNORECASE) for i in fqlist}
if trim_adapters:
trim_arg = "--detect_adapter_for_pe" if trim_adapters == "auto" else f"--adapter_fasta {trim_adapters}"
else:
trim_arg = "--disable_adapter_trimming"

def get_fq1(wildcards):
# returns a list of fastq files for read 1 based on *wildcards.sample* e.g.
Expand All @@ -56,7 +60,7 @@ if not deconvolve:
params:
minlen = f"--length_required {min_len}",
maxlen = f"--max_len1 {max_len}",
trim_adapters = "--detect_adapter_for_pe" if trimadapters else "--disable_adapter_trimming" ,
trim_adapters = trim_arg,
dedup = "-D" if dedup else "",
extra = extra
threads:
Expand All @@ -81,7 +85,7 @@ else:
params:
minlen = f"--length_required {min_len}",
maxlen = f"--max_len1 {max_len}",
trim_adapters = "--detect_adapter_for_pe" if trimadapters else "--disable_adapter_trimming" ,
trim_adapters = trim_arg,
dedup = "-D" if dedup else "",
extra = extra
threads:
Expand Down Expand Up @@ -177,7 +181,7 @@ rule workflow_summary:
params:
minlen = f"--length_required {min_len}",
maxlen = f"--max_len1 {max_len}",
trim_adapters = "--detect_adapter_for_pe" if trimadapters else "--disable_adapter_trimming" ,
trim_adapters = trim_arg,
dedup = "-D" if dedup else "",
extra = extra
run:
Expand Down

0 comments on commit 2e8c1b0

Please sign in to comment.