From b67c70832579e22fc30744aefead320c953b7b16 Mon Sep 17 00:00:00 2001 From: pdimens Date: Fri, 28 Jun 2024 15:25:37 -0400 Subject: [PATCH 1/3] move pysam into main env --- .deprecated/align-minimap.smk | 16 ++--- .github/filters.yml | 19 ++--- src/harpy/align.py | 5 -- src/harpy/{scripts => bin}/assignMI.py | 71 ++++++++++--------- src/harpy/{scripts => bin}/bxStats.py | 33 ++++++++- src/harpy/{scripts => bin}/checkBAM.py | 35 +++++++-- src/harpy/{scripts => bin}/checkFASTQ.py | 34 +++++++-- src/harpy/{scripts => bin}/countBX.py | 44 +++++++++--- src/harpy/{scripts => bin}/prune_haplotype.py | 0 src/harpy/preflight.py | 2 - src/harpy/qc.py | 1 - src/harpy/snakefiles/align-bwa.smk | 16 ++--- src/harpy/snakefiles/align-ema.smk | 8 +-- src/harpy/snakefiles/align-strobealign.smk | 16 ++--- src/harpy/snakefiles/preflight-bam.smk | 8 +-- src/harpy/snakefiles/preflight-fastq.smk | 16 ++--- src/harpy/snakefiles/qc.smk | 8 +-- 17 files changed, 212 insertions(+), 120 deletions(-) rename src/harpy/{scripts => bin}/assignMI.py (80%) rename src/harpy/{scripts => bin}/bxStats.py (78%) rename src/harpy/{scripts => bin}/checkBAM.py (58%) rename src/harpy/{scripts => bin}/checkFASTQ.py (63%) rename src/harpy/{scripts => bin}/countBX.py (51%) rename src/harpy/{scripts => bin}/prune_haplotype.py (100%) diff --git a/.deprecated/align-minimap.smk b/.deprecated/align-minimap.smk index 055a3dc63..a91fcc789 100644 --- a/.deprecated/align-minimap.smk +++ b/.deprecated/align-minimap.smk @@ -234,12 +234,12 @@ rule assign_molecules: bai = outdir + "/{sample}.bam.bai" params: molecule_distance - conda: - f"{envdir}/qc.yaml" + container: + None message: "Assigning barcodes to molecules: {wildcards.sample}" - script: - "scripts/assignMI.py" + shell: + "assignMI.py -o {output.bam} -c {params} {input.bam}" rule alignment_bxstats: input: @@ -249,12 +249,12 @@ rule alignment_bxstats: outdir + "/reports/data/bxstats/{sample}.bxstats.gz" params: sample = lambda wc: d[wc.sample] - conda: - f"{envdir}/qc.yaml" + container: + None message: "Calculating barcode alignment statistics: {wildcards.sample}" - script: - "scripts/bxStats.py" + shell: + "bxStats.py -o {output} {input.bam}" rule alignment_coverage: input: diff --git a/.github/filters.yml b/.github/filters.yml index a232a00b6..92f30db03 100644 --- a/.github/filters.yml +++ b/.github/filters.yml @@ -18,8 +18,8 @@ preflight: &preflight - 'src/harpy/rules/preflight-bam.smk' - 'test/fastq/**' - 'test/bam/**' - - 'src/harpy/scripts/checkBAM.py' - - 'src/harpy/scripts/checkFASTQ.py' + - 'src/harpy/bin/checkBAM.py' + - 'src/harpy/bin/checkFASTQ.py' - 'src/harpy/reports/PreflightFastq.Rmd' - 'src/harpy/reports/PreflightBam.Rmd' demux: &demux @@ -43,8 +43,9 @@ bwa: &bwa - 'src/harpy/rules/align-bwa.smk' - 'src/harpy/reports/AlignStats.Rmd' - 'src/harpy/reports/BxCount.Rmd' - - 'src/harpy/scripts/bxStats.py' - - 'src/harpy/scripts/countBX.py' + - 'src/harpy/bin/bxStats.py' + - 'src/harpy/bin/countBX.py' + - `src/harpy/bin/assignMI.py' - 'src/harpy/bin/makeWindows.py' - 'test/fastq/**' ema: &ema @@ -53,10 +54,9 @@ ema: &ema - 'src/harpy/align.py' - 'src/harpy/rules/align-ema.smk' - 'src/harpy/reports/AlignStats.Rmd' - - 'src/harpy/reports/EmaCount.Rmd' - 'src/harpy/reports/BxCount.Rmd' - - 'src/harpy/scripts/bxStats.py' - - 'src/harpy/scripts/countBX.py' + - 'src/harpy/bin/bxStats.py' + - 'src/harpy/bin/countBX.py' - 'src/harpy/bin/makewindows.py' - 'test/fastq/**' strobealign: &strobealign @@ -66,8 +66,9 @@ strobealign: &strobealign - 'src/harpy/rules/align-strobealign.smk' - 'src/harpy/reports/AlignStats.Rmd' - 'src/harpy/reports/BxCount.Rmd' - - 'src/harpy/scripts/bxStats.py' - - 'src/harpy/scripts/countBX.py' + - `src/harpy/bin/assignMI.py' + - 'src/harpy/bin/bxStats.py' + - 'src/harpy/bin/countBX.py' - 'src/harpy/bin/makewindows.py' - 'test/fastq/**' mpileup: &mpileup diff --git a/src/harpy/align.py b/src/harpy/align.py index a0e03f29c..4342161d6 100644 --- a/src/harpy/align.py +++ b/src/harpy/align.py @@ -116,8 +116,6 @@ def bwa(inputs, output_dir, genome, depth_window, threads, extra_params, quality fqlist, sample_count = parse_fastq_inputs(inputs) validate_input_by_ext(genome, "--genome", [".fasta", ".fa", ".fasta.gz", ".fa.gz"]) fetch_rule(workflowdir, "align-bwa.smk") - fetch_script(workflowdir, "assignMI.py") - fetch_script(workflowdir, "bxStats.py") fetch_report(workflowdir, "AlignStats.Rmd") fetch_report(workflowdir, "AlignBxStats.Rmd") @@ -208,7 +206,6 @@ def ema(inputs, output_dir, platform, whitelist, genome, depth_window, threads, fqlist, sample_count = parse_fastq_inputs(inputs) validate_input_by_ext(genome, "--genome", [".fasta", ".fa", ".fasta.gz", ".fa.gz"]) fetch_rule(workflowdir, "align-ema.smk") - fetch_script(workflowdir, "bxStats.py") fetch_report(workflowdir, "AlignStats.Rmd") fetch_report(workflowdir, "AlignBxStats.Rmd") @@ -289,8 +286,6 @@ def strobe(inputs, output_dir, genome, read_length, depth_window, threads, extra fqlist, sample_count = parse_fastq_inputs(inputs) validate_input_by_ext(genome, "--genome", [".fasta", ".fa", ".fasta.gz", ".fa.gz"]) fetch_rule(workflowdir, "align-strobealign.smk") - fetch_script(workflowdir, "assignMI.py") - fetch_script(workflowdir, "bxStats.py") fetch_report(workflowdir, "AlignStats.Rmd") fetch_report(workflowdir, "AlignBxStats.Rmd") diff --git a/src/harpy/scripts/assignMI.py b/src/harpy/bin/assignMI.py similarity index 80% rename from src/harpy/scripts/assignMI.py rename to src/harpy/bin/assignMI.py index 10f85ee35..f8ab52809 100755 --- a/src/harpy/scripts/assignMI.py +++ b/src/harpy/bin/assignMI.py @@ -1,29 +1,34 @@ +#! /usr/bin/env python + import re import os import sys +import argparse import pysam -#import argparse - -#parser = argparse.ArgumentParser( -# prog = 'assignMI.py', -# description = -# """ -# Assign an MI:i: (Molecular Identifier) tag to each barcoded -# record based on a molecular distance cutoff. Unmapped records -# are discarded in the output. Records without a BX:Z: tag or -# with an invalid barcode (00 as one of its segments) are presevered -# but are not assigned an MI:i tag. Input file MUST BE COORDINATE SORTED. -# """, -# usage = "assignMI.py -c cutoff -i input.bam -o output.bam", -# exit_on_error = False -# ) -#parser.add_argument('-c','--cutoff', type=int, default = 100000, help = "Distance in base pairs at which alignments with the same barcode should be considered different molecules. (default: 100000)") -#parser.add_argument('-i', '--input', help = "Input coordinate-sorted bam/sam file. If bam, a matching index file should be in the same directory.") -#parser.add_argument('-o', '--output', help = "Output bam file. Will also create an index file.") -# -#if len(sys.argv) == 1: -# parser.print_help(sys.stderr) -# sys.exit(1) + +parser = argparse.ArgumentParser( + prog = 'assignMI.py', + description = + """ + Assign an MI:i: (Molecular Identifier) tag to each barcoded + record based on a molecular distance cutoff. Unmapped records + are discarded in the output. Records without a BX:Z: tag or + with an invalid barcode (00 as one of its segments) are presevered + but are not assigned an MI:i tag. Input file MUST BE COORDINATE SORTED. + """, + usage = "assignMI.py -c cutoff -o output.bam input.bam", + exit_on_error = False + ) + +parser.add_argument('-c','--cutoff', type=int, default = 100000, help = "Distance in base pairs at which alignments with the same barcode should be considered different molecules. (default: 100000)") +parser.add_argument('-o', '--output', help = "Output bam file. Will also create an index file.") +parser.add_argument('input', help = "Input coordinate-sorted bam/sam file. If bam, a matching index file should be in the same directory.") + +if len(sys.argv) == 1: + parser.print_help(sys.stderr) + sys.exit(1) + +args = parser.parse_args() def write_validbx(bam, alnrecord, molID): ''' @@ -97,7 +102,7 @@ def write_missingbx(bam, alnrecord): bam.write(alnrecord) #args = parser.parse_args() -bam_input = snakemake.input[0] +bam_input = args.input # initialize the dict d = dict() # chromlast keeps track of the last chromosome so we can @@ -117,17 +122,15 @@ def write_missingbx(bam, alnrecord): alnfile = pysam.AlignmentFile(bam_input) # iniitalize output file -#alnfile = pysam.AlignmentFile("/home/pdimens/Documents/harpy/test/bam/sample1.bam") -outfile = pysam.AlignmentFile(snakemake.output[0], "wb", template = alnfile) -#outfile = pysam.AlignmentFile("/home/pdimens/Documents/harpy/test/bam/test.bam", "w", template = alnfile) +outfile = pysam.AlignmentFile(args.output, "wb", template = alnfile) for record in alnfile.fetch(): chrm = record.reference_name bp = record.query_alignment_length # check if the current chromosome is different from the previous one # if so, empty the dict (a consideration for RAM usage) - if chromlast != False and chrm != chromlast: - d = dict() + if chromlast is not False and chrm != chromlast: + d = {} if record.is_unmapped: # skip, don't output chromlast = chrm @@ -146,7 +149,7 @@ def write_missingbx(bam, alnrecord): write_missingbx(outfile, record) chromlast = chrm continue - + aln = record.get_blocks() if not aln: # unaligned, skip and don't output @@ -160,9 +163,9 @@ def write_missingbx(bam, alnrecord): pos_end = aln[-1][1] # create bx entry if it's not present - if bx not in d.keys(): + if bx not in d: # increment MI b/c it's a new molecule - MI += 1 + MI += 1 d[bx] = { "lastpos" : pos_end, "current_suffix": 0, @@ -184,9 +187,9 @@ def write_missingbx(bam, alnrecord): # if the distance between alignments is > cutoff, it's a different molecule # so we'll +1 the suffix of the original barcode and relabel this one as # BX + suffix. Since it's a new entry, we initialize it and move on - if dist > snakemake.params[0]: + if dist > args.cutoff: # increment MI b/c it's a new molecule - MI += 1 + MI += 1 # increment original barcode's suffix d[orig]["current_suffix"] += 1 bx = orig + "." + str(d[orig]["current_suffix"]) @@ -216,4 +219,4 @@ def write_missingbx(bam, alnrecord): outfile.close() # index the output file -pysam.index(snakemake.output[0]) \ No newline at end of file +pysam.index(args.output) \ No newline at end of file diff --git a/src/harpy/scripts/bxStats.py b/src/harpy/bin/bxStats.py similarity index 78% rename from src/harpy/scripts/bxStats.py rename to src/harpy/bin/bxStats.py index fae907bf8..5cf2c4c45 100755 --- a/src/harpy/scripts/bxStats.py +++ b/src/harpy/bin/bxStats.py @@ -1,9 +1,38 @@ +#! /usr/bin/env python + import re +import sys import gzip +import argparse import pysam -alnfile = pysam.AlignmentFile(snakemake.input[0]) -outfile = gzip.open(snakemake.output[0], "wb", 6) +parser = argparse.ArgumentParser( + prog = 'bxStats.py', + description = + """ + Calculates various linked-read molecule metrics from the input alignment file. + Metrics include (per molecule): number of reads, position start, position end, + length of molecule inferred from alignments, total aligned basepairs, total, + length of inferred inserts, molecule coverage (%) based on aligned bases, molecule + coverage (%) based on total inferred insert length. + Input file MUST BE COORDINATE SORTED. + """, + usage = "bxStats.py -o output.gz input.bam", + exit_on_error = False + ) + +parser.add_argument('-o', '--output', help = "Gzipped tab-delimited file of metrics.") +parser.add_argument('input', help = "Input coordinate-sorted bam/sam file. If bam, a matching index file should be in the same directory.") + +if len(sys.argv) == 1: + parser.print_help(sys.stderr) + sys.exit(1) + +args = parser.parse_args() + + +alnfile = pysam.AlignmentFile(args.input) +outfile = gzip.open(args.output, "wb", 6) outfile.write(b"contig\tmolecule\treads\tstart\tend\tlength_inferred\taligned_bp\tinsert_len\tcoverage_bp\tcoverage_inserts\n") d = {} diff --git a/src/harpy/scripts/checkBAM.py b/src/harpy/bin/checkBAM.py similarity index 58% rename from src/harpy/scripts/checkBAM.py rename to src/harpy/bin/checkBAM.py index e009e6746..d6844f423 100755 --- a/src/harpy/scripts/checkBAM.py +++ b/src/harpy/bin/checkBAM.py @@ -1,9 +1,33 @@ -import pysam -import sys +#! /usr/bin/env python + import re -import os.path +import sys +import os +import argparse +import pysam + +parser = argparse.ArgumentParser( + prog = 'checkBAM.py', + description = + """ + Parses an aligment (sam/bam) file to check if the sample name + matched the RG tag, whether BX:Z: is the last tag in the record, + and the counts of: total alignments, alignments with an MI:i: tag, + alignments without BX:Z: tag, incorrect BX:Z: tag. + """, + usage = "checkBAM.py input.bam > output.txt", + exit_on_error = False + ) + +parser.add_argument('input', help = "Input bam/sam file. If bam, a matching index file should be in the same directory.") + +if len(sys.argv) == 1: + parser.print_help(sys.stderr) + sys.exit(1) + +args = parser.parse_args() -bam_in = snakemake.input[0] +bam_in = args.input # regex for EXACTLY AXXCXXBXXDXX haplotag = re.compile('^A[0-9][0-9]C[0-9][0-9]B[0-9][0-9]D[0-9][0-9]$') @@ -46,5 +70,4 @@ values = [str(i) for i in [os.path.basename(bam_in), nameMismatch, n_reads, noMI, noBX, badBX, bxNotLast]] -with open(snakemake.output[0], "w") as fout: - print("\t".join(values), file = fout) \ No newline at end of file +print("\t".join(values), file = sys.stdout) diff --git a/src/harpy/scripts/checkFASTQ.py b/src/harpy/bin/checkFASTQ.py similarity index 63% rename from src/harpy/scripts/checkFASTQ.py rename to src/harpy/bin/checkFASTQ.py index 01666fa09..ff36f1ae3 100755 --- a/src/harpy/scripts/checkFASTQ.py +++ b/src/harpy/bin/checkFASTQ.py @@ -1,9 +1,32 @@ -import pysam -import sys +#! /usr/bin/env python + import re -import os.path +import os +import sys +import argparse +import pysam + +parser = argparse.ArgumentParser( + prog = 'checkFASTQ.py', + description = + """ + Parses a FASTQ file to check if any sequences don't conform to the SAM spec, + whether BX:Z: is the last tag in the record, and the counts of: total reads, + reads without BX:Z: tag, reads with incorrect BX:Z: tag. + """, + usage = "checkBAM.py input.bam > output.txt", + exit_on_error = False + ) + +parser.add_argument('input', help = "Input fastq file. Can be gzipped.") + +if len(sys.argv) == 1: + parser.print_help(sys.stderr) + sys.exit(1) + +args = parser.parse_args() -fq_in = snakemake.input[0] +fq_in = args.input #bxz = re.compile('BX:Z:') samspec = re.compile('[A-Z][A-Z]:[AifZHB]:') @@ -38,5 +61,4 @@ noBX += 1 values = [str(i) for i in [os.path.basename(fq_in), n_reads, noBX, badBX, badSamSpec, bxNotLast]] -with open(snakemake.output[0], "w") as fout: - print("\t".join(values), file = fout) \ No newline at end of file +print("\t".join(values), file = sys.stdout) diff --git a/src/harpy/scripts/countBX.py b/src/harpy/bin/countBX.py similarity index 51% rename from src/harpy/scripts/countBX.py rename to src/harpy/bin/countBX.py index 09132fae6..6b47aa279 100755 --- a/src/harpy/scripts/countBX.py +++ b/src/harpy/bin/countBX.py @@ -1,6 +1,29 @@ -import pysam +#! /usr/bin/env python + import re import sys +import argparse +import pysam + +parser = argparse.ArgumentParser( + prog = 'countBX.py', + description = + """ + Parses a FASTQ file to count: total sequences, total number of BX tags, + number of valid haplotagging BX tags, number of invalid BX tags, number of + invalid BX tag segments (i.e. A00, C00, B00, D00) + """, + usage = "countBX.py input.fastq > output.txt", + exit_on_error = False + ) + +parser.add_argument('input', help = "Input fastq file. Can be gzipped.") + +if len(sys.argv) == 1: + parser.print_help(sys.stderr) + sys.exit(1) + +args = parser.parse_args() n_reads = 0 n_bx = 0 @@ -16,7 +39,7 @@ "C" : 0, "D" : 0 } -with pysam.FastxFile(snakemake.input[0]) as fh: +with pysam.FastxFile(args.input) as fh: for entry in fh: n_reads += 1 comments = entry.comment.split() @@ -38,12 +61,11 @@ continue n_valid += 1 -with open(snakemake.output[0], "w") as fout: - print(f"totalReads\t{n_reads}", file = fout) - print(f"bxTagCount\t{n_bx}", file = fout) - print(f"bxValid\t{n_valid}", file = fout) - print(f"bxInvalid\t{n_bx - n_valid}", file = fout) - print("A00\t",str(inv_dict["A"]), file = fout) - print("C00\t",str(inv_dict["C"]), file = fout) - print("B00\t",str(inv_dict["B"]), file = fout) - print("D00\t",str(inv_dict["D"]), file = fout) \ No newline at end of file +print(f"totalReads\t{n_reads}", file = sys.stdout) +print(f"bxTagCount\t{n_bx}", file = sys.stdout) +print(f"bxValid\t{n_valid}", file = sys.stdout) +print(f"bxInvalid\t{n_bx - n_valid}", file = sys.stdout) +print("A00\t",str(inv_dict["A"]), file = sys.stdout) +print("C00\t",str(inv_dict["C"]), file = sys.stdout) +print("B00\t",str(inv_dict["B"]), file = sys.stdout) +print("D00\t",str(inv_dict["D"]), file = sys.stdout) diff --git a/src/harpy/scripts/prune_haplotype.py b/src/harpy/bin/prune_haplotype.py similarity index 100% rename from src/harpy/scripts/prune_haplotype.py rename to src/harpy/bin/prune_haplotype.py diff --git a/src/harpy/preflight.py b/src/harpy/preflight.py index 07e6bf9cd..5b538b686 100755 --- a/src/harpy/preflight.py +++ b/src/harpy/preflight.py @@ -76,7 +76,6 @@ def fastq(inputs, output_dir, threads, snakemake, quiet, hpc, conda, print_only) os.makedirs(f"{workflowdir}/", exist_ok= True) fqlist, n = parse_fastq_inputs(inputs) fetch_rule(workflowdir, "preflight-fastq.smk") - fetch_script(workflowdir, "checkFASTQ.py") fetch_report(workflowdir, "PreflightFastq.Rmd") with open(f"{workflowdir}/config.yaml", "w", encoding="utf-8") as config: @@ -136,7 +135,6 @@ def bam(inputs, output_dir, threads, snakemake, quiet, hpc, conda, print_only): bamlist, n = parse_alignment_inputs(inputs) fetch_rule(workflowdir, "preflight-bam.smk") fetch_report(workflowdir, "PreflightBam.Rmd") - fetch_script(workflowdir, "checkBAM.py") with open(f"{workflowdir}/config.yaml", "w", encoding="utf-8") as config: config.write("workflow: preflight bam\n") diff --git a/src/harpy/qc.py b/src/harpy/qc.py index 120fedf9f..477b0c31e 100644 --- a/src/harpy/qc.py +++ b/src/harpy/qc.py @@ -67,7 +67,6 @@ def qc(inputs, output_dir, min_length, max_length, ignore_adapters, extra_params os.makedirs(workflowdir, exist_ok=True) fqlist, sample_count = parse_fastq_inputs(inputs) - fetch_script(workflowdir, "countBX.py") fetch_rule(workflowdir, "qc.smk") fetch_report(workflowdir, "BxCount.Rmd") diff --git a/src/harpy/snakefiles/align-bwa.smk b/src/harpy/snakefiles/align-bwa.smk index 3c3617a98..f83acfbf7 100644 --- a/src/harpy/snakefiles/align-bwa.smk +++ b/src/harpy/snakefiles/align-bwa.smk @@ -186,12 +186,12 @@ rule assign_molecules: bai = outdir + "/{sample}.bam.bai" params: molecule_distance - conda: - f"{envdir}/qc.yaml" + container: + None message: "Assigning barcodes to molecules: {wildcards.sample}" - script: - "scripts/assignMI.py" + shell: + "assignMI.py -o {output.bam} -c {params} {input.bam}" rule bxstats: input: @@ -201,12 +201,12 @@ rule bxstats: outdir + "/reports/data/bxstats/{sample}.bxstats.gz" params: sample = lambda wc: d[wc.sample] - conda: - f"{envdir}/qc.yaml" + container: + None message: "Calculating barcoded alignment statistics: {wildcards.sample}" - script: - "scripts/bxStats.py" + shell: + "bxStats.py -o {output} {input.bam}" rule coverage: input: diff --git a/src/harpy/snakefiles/align-ema.smk b/src/harpy/snakefiles/align-ema.smk index ee410dd5c..f95182abf 100644 --- a/src/harpy/snakefiles/align-ema.smk +++ b/src/harpy/snakefiles/align-ema.smk @@ -306,12 +306,12 @@ rule bx_stats: bai = outdir + "/{sample}.bam.bai" output: outdir + "/reports/data/bxstats/{sample}.bxstats.gz" - conda: - f"{envdir}/qc.yaml" + container: + None message: "Calculating barcode alignment statistics: {wildcards.sample}" - script: - "scripts/bxStats.py" + shell: + "bxStats.py -o {output} {input.bam}" rule report_persample: input: diff --git a/src/harpy/snakefiles/align-strobealign.smk b/src/harpy/snakefiles/align-strobealign.smk index 6a5bbc331..9d5e45564 100644 --- a/src/harpy/snakefiles/align-strobealign.smk +++ b/src/harpy/snakefiles/align-strobealign.smk @@ -185,12 +185,12 @@ rule assign_molecules: bai = outdir + "/{sample}.bam.bai" params: molecule_distance - conda: - f"{envdir}/qc.yaml" + container: + None message: "Assigning barcodes to molecules: {wildcards.sample}" - script: - "scripts/assignMI.py" + shell: + "assignMI.py -o {output.bam} -c {params} {input.bam}" rule bxstats: input: @@ -200,12 +200,12 @@ rule bxstats: outdir + "/reports/data/bxstats/{sample}.bxstats.gz" params: sample = lambda wc: d[wc.sample] - conda: - f"{envdir}/qc.yaml" + container: + None message: "Calculating barcode alignment statistics: {wildcards.sample}" - script: - "scripts/bxStats.py" + shell: + "bxStats.py -o {output} {input.bam}" rule coverage: input: diff --git a/src/harpy/snakefiles/preflight-bam.smk b/src/harpy/snakefiles/preflight-bam.smk index 64f8979c4..e3a7decd7 100644 --- a/src/harpy/snakefiles/preflight-bam.smk +++ b/src/harpy/snakefiles/preflight-bam.smk @@ -78,12 +78,12 @@ rule check_bam: bai = get_align_index output: temp(out_dir + "/{sample}.log") - conda: - f"{envdir}/qc.yaml" + container: + None message: "Processing: {wildcards.sample}" - script: - "scripts/checkBAM.py" + shell: + "checkBAM.py {input.bam} > {output}" rule merge_checks: input: diff --git a/src/harpy/snakefiles/preflight-fastq.smk b/src/harpy/snakefiles/preflight-fastq.smk index 84fcacb7a..6142fe1cb 100644 --- a/src/harpy/snakefiles/preflight-fastq.smk +++ b/src/harpy/snakefiles/preflight-fastq.smk @@ -58,12 +58,12 @@ rule check_forward: get_fq1 output: temp(out_dir + "/{sample}.F.log") - conda: - f"{envdir}/qc.yaml" + container: + None message: "Processing forward reads: {wildcards.sample}" - script: - "scripts/checkFASTQ.py" + shell: + "checkFASTQ.py {input} > {output}" rule check_reverse: input: @@ -72,10 +72,10 @@ rule check_reverse: temp(out_dir + "/{sample}.R.log") message: "Processing reverse reads: {wildcards.sample}" - conda: - f"{envdir}/qc.yaml" - script: - "scripts/checkFASTQ.py" + container: + None + shell: + "checkFASTQ.py {input} > {output}" rule merge_checks: input: diff --git a/src/harpy/snakefiles/qc.smk b/src/harpy/snakefiles/qc.smk index 3621ebee9..16598ea3c 100644 --- a/src/harpy/snakefiles/qc.smk +++ b/src/harpy/snakefiles/qc.smk @@ -89,10 +89,10 @@ rule count_beadtags: temp(outdir + "/logs/bxcount/{sample}.count.log") message: "Counting barcode frequency: {wildcards.sample}" - conda: - f"{envdir}/qc.yaml" - script: - "scripts/countBX.py" + container: + None + shell: + "countBX.py {input} > {output}" rule beadtag_counts_summary: input: From 7a5b83e1458f72561fa8f90ebb757484fbbdd736 Mon Sep 17 00:00:00 2001 From: pdimens Date: Fri, 28 Jun 2024 15:39:43 -0400 Subject: [PATCH 2/3] fix --- .github/filters.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/filters.yml b/.github/filters.yml index 92f30db03..3cc4233f4 100644 --- a/.github/filters.yml +++ b/.github/filters.yml @@ -45,7 +45,7 @@ bwa: &bwa - 'src/harpy/reports/BxCount.Rmd' - 'src/harpy/bin/bxStats.py' - 'src/harpy/bin/countBX.py' - - `src/harpy/bin/assignMI.py' + - 'src/harpy/bin/assignMI.py' - 'src/harpy/bin/makeWindows.py' - 'test/fastq/**' ema: &ema @@ -66,7 +66,7 @@ strobealign: &strobealign - 'src/harpy/rules/align-strobealign.smk' - 'src/harpy/reports/AlignStats.Rmd' - 'src/harpy/reports/BxCount.Rmd' - - `src/harpy/bin/assignMI.py' + - 'src/harpy/bin/assignMI.py' - 'src/harpy/bin/bxStats.py' - 'src/harpy/bin/countBX.py' - 'src/harpy/bin/makewindows.py' From 503825e0defcbf8936f2ebf17adb014e0b5aa60d Mon Sep 17 00:00:00 2001 From: pdimens Date: Fri, 28 Jun 2024 15:56:56 -0400 Subject: [PATCH 3/3] add missing pysam --- resources/harpy.yaml | 1 + resources/meta.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/resources/harpy.yaml b/resources/harpy.yaml index d3fc6ae47..129fe6d02 100644 --- a/resources/harpy.yaml +++ b/resources/harpy.yaml @@ -7,6 +7,7 @@ dependencies: - bcftools =1.20 - mamba - pandas + - pysam - python - rich-click - snakemake-minimal >7 diff --git a/resources/meta.yaml b/resources/meta.yaml index e002472f1..88ccdfb59 100644 --- a/resources/meta.yaml +++ b/resources/meta.yaml @@ -34,6 +34,7 @@ requirements: - apptainer - bcftools =1.20 - pandas + - pysam - python >3.10 - rich-click - snakemake-minimal >7