From 9ab82db528fffd6412f5ea098da90f3ae7d85773 Mon Sep 17 00:00:00 2001 From: Jody Phelan Date: Thu, 21 Nov 2024 19:41:53 +0100 Subject: [PATCH] modify stdev --- pathogenprofiler/barcode.py | 8 ++++---- pathogenprofiler/profiler.py | 8 +++++--- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/pathogenprofiler/barcode.py b/pathogenprofiler/barcode.py index 04dba22..aa5e388 100644 --- a/pathogenprofiler/barcode.py +++ b/pathogenprofiler/barcode.py @@ -57,7 +57,7 @@ def get_barcoding_mutations(mutations: dict, barcode_bed: str) -> tuple[dict, Li return (barcode_support,snps_report) -def barcode(mutations,barcode_bed: str,snps_file=None) -> List[BarcodeResult]: +def barcode(mutations,barcode_bed: str,snps_file=None,stdev_cutoff=0.15) -> List[BarcodeResult]: bed_num_col = len(open(barcode_bed).readline().rstrip().split("\t")) # bed = [] lineage_info = {} @@ -77,7 +77,7 @@ def barcode(mutations,barcode_bed: str,snps_file=None) -> List[BarcodeResult]: for l in barcode_support: logging.debug("Processing %s" % l) logging.debug(barcode_support[l]) - # If stdev of fraction across all barcoding positions > 0.15 + # If stdev of fraction across all barcoding positions > stdev_cutoff # Only look at positions with >5 reads tmp_allelic_dp = [x[1]/(x[0]+x[1]) for x in barcode_support[l] if sum(x)>5] logging.debug(tmp_allelic_dp) @@ -89,8 +89,8 @@ def barcode(mutations,barcode_bed: str,snps_file=None) -> List[BarcodeResult]: if len(tmp_allelic_dp)==0: logging.debug("No SNPs found for %s" % l) continue - if stdev(tmp_allelic_dp)>0.15: - logging.debug("Stdev > 0.15 for %s" % l) + if stdev(tmp_allelic_dp)>stdev_cutoff: + logging.debug(f"Stdev {stdev(tmp_allelic_dp)} > {stdev_cutoff} for {l}") continue # if number of barcoding positions > 5 and only one shows alternate diff --git a/pathogenprofiler/profiler.py b/pathogenprofiler/profiler.py index 7dc7627..7e36f70 100644 --- a/pathogenprofiler/profiler.py +++ b/pathogenprofiler/profiler.py @@ -17,8 +17,9 @@ def bam_barcoder(args: argparse.Namespace) -> List[BarcodeResult]: bam = Bam(args.bam, args.files_prefix, platform=args.platform, threads=args.threads) if not hasattr(args,'barcode_snps'): args.barcode_snps = None - barcode_mutations = bam.get_bed_gt(conf["barcode"],conf["ref"], caller=args.caller,platform=args.platform) - barcode_assignment = barcode(barcode_mutations,conf["barcode"],args.barcode_snps) + barcode_mutations = bam.get_bed_gt(conf["barcode"],conf["ref"], caller=args.caller,platform=args.platform) + stdev_cutoff = args.barcode_stdev if hasattr(args,'barcode_stdev') else None + barcode_assignment = barcode(barcode_mutations,conf["barcode"],args.barcode_snps,stdev_cutoff=stdev_cutoff) return barcode_assignment def vcf_barcoder(args: argparse.Namespace) -> List[BarcodeResult]: @@ -29,7 +30,8 @@ def vcf_barcoder(args: argparse.Namespace) -> List[BarcodeResult]: barcode_mutations = vcf.get_bed_gt(conf["barcode"],conf["ref"]) if not hasattr(args,'barcode_snps'): args.barcode_snps = None - barcode_assignment = barcode(barcode_mutations,conf["barcode"],args.barcode_snps) + stdev_cutoff = args.barcode_stdev if hasattr(args,'barcode_stdev') else None + barcode_assignment = barcode(barcode_mutations,conf["barcode"],args.barcode_snps,stdev_cutoff=stdev_cutoff) return barcode_assignment