diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ac6d9a68..36289e56 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -40,7 +40,7 @@ repos: # log_file: mypy.log - repo: https://github.com/pycqa/flake8 - rev: 7.1.1 + rev: 7.1.2 hooks: - id: flake8 args: [ diff --git a/amira/__init__.py b/amira/__init__.py index 364e7bae..7bbb2ef5 100644 --- a/amira/__init__.py +++ b/amira/__init__.py @@ -1 +1 @@ -__version__ = "0.6.4" +__version__ = "0.6.5" diff --git a/amira/__main__.py b/amira/__main__.py index d8fb0ec0..6e25ef86 100644 --- a/amira/__main__.py +++ b/amira/__main__.py @@ -2,6 +2,7 @@ import json import os import random +import re import sys import time @@ -24,7 +25,7 @@ process_reference_alleles, run_pandora_map, ) -from amira.read_utils import parse_fastq, plot_read_length_distribution +from amira.read_utils import parse_fastq, plot_read_length_distribution, write_fastq from amira.result_utils import ( estimate_copy_numbers, filter_results, @@ -260,15 +261,16 @@ def main() -> None: if not args.quiet: sys.stderr.write("\nAmira: loading FASTQ file.\n") fastq_content = parse_fastq(args.reads) + # remove underscores in read names + fastq_content = {re.sub(r"\W+", "", r): fastq_content[r] for r in fastq_content} + # write the modified fastq data to the output directoy + read_fastq_path = os.path.join(args.output_dir, os.path.basename(args.reads)) + write_fastq( + read_fastq_path, + fastq_content, + ) # run pandora if args.pandoraSam is None and args.pandoraJSON is None: - # if args.sample_reads: - # # randomly sample 100,000 reads - # read_fastq_path = downsample_reads( - # fastq_content, args.reads, args.output_dir, args.sample_size - # ) - # else: - read_fastq_path = args.reads if not args.quiet: sys.stderr.write("\nAmira: running Pandora map.\n") pandoraSam, pandoraConsensus = run_pandora_map( @@ -282,9 +284,6 @@ def main() -> None: else: pandoraSam = args.pandoraSam pandoraConsensus = args.pandoraConsensus - read_fastq_path = args.reads - # remove underscores in read names - fastq_content = {r.replace("_", ""): fastq_content[r] for r in fastq_content} # import a JSON of genes on reads if args.pandoraJSON: if not args.quiet: @@ -564,7 +563,7 @@ def main() -> None: copy_numbers = estimate_copy_numbers( mean_read_depth, os.path.join(args.output_dir, "AMR_allele_fastqs", "longest_reads.fasta"), - args.reads, + read_fastq_path, args.cores, args.samtools_path, ) diff --git a/amira/pre_processing.py b/amira/pre_processing.py index 9f71be2c..46af5566 100644 --- a/amira/pre_processing.py +++ b/amira/pre_processing.py @@ -12,8 +12,9 @@ def run_pandora_map(pandora_path, panRG_path, readfile, outdir, cores, seed): command = f"{pandora_path} map -t {cores} --min-gene-coverage-proportion 0.5 --max-covg 10000 " command += ( - f"-o {os.path.join(outdir, 'pandora_output')} {panRG_path} {readfile} --rng-seed {seed}" + f"-o {os.path.join(outdir, 'pandora_output')} {panRG_path} {readfile} --rng-seed {seed} " ) + command += "--min-abs-gene-coverage 1" # check that the panRG file exists if not os.path.exists(panRG_path): sys.stderr.write("\nAmira: panRG file does not exist.\n") diff --git a/pyproject.toml b/pyproject.toml index 15f6c9a4..34e36694 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "amira-amr" -version = "0.6.4" +version = "0.6.5" description = "Amira" authors = ["Daniel Anderson "] license = "Apache-2.0"