Skip to content

Commit

Permalink
pre-2.6.0-counts
Browse files Browse the repository at this point in the history
  • Loading branch information
telatin committed Feb 6, 2022
1 parent 1a397dc commit ee9331d
Show file tree
Hide file tree
Showing 8 changed files with 76 additions and 9 deletions.
Binary file modified bin/covtotarget
Binary file not shown.
Binary file modified bin/gff2bed
Binary file not shown.
5 changes: 5 additions & 0 deletions input/someregions.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
seq1 5 112 include_5
seq1 410 532 overlap_2
seq1 800 950 empty1_0
seq2 300 532 shared1_10
seq2 566 769 shared2_10
Binary file added input/unsorted.bam
Binary file not shown.
27 changes: 27 additions & 0 deletions scripts/bedToGtf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/usr/bin/env python

import argparse, sys

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Convert bed file to gtf file.")
parser.add_argument("bed", help="Input bed file.")
parser.add_argument("gtf", help="Output gtf file.")
args = parser.parse_args()

if args.gtf:
outfile = open(args.gtf, "w")
else:
outfile = sys.stdout
with open(args.bed) as f:
for line in f:
line = line.strip()
if line:

fields = line.split()

chrom, start, end, name = fields[0], fields[1], fields[2], fields[3]
strand = "+"
#chr2 215593349 215593782 REG1
#seq1 bamtocov exon 5 111 . + . gene_id "include_5"
print(f"{chrom}\tbamtocov\texon\t{start}\t{end}\t.\t{strand}\t.\tgene_id \"{name}\"", file=outfile)

19 changes: 19 additions & 0 deletions scripts/comparecounts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/usr/bin/env python3
import sys
inputfile = sys.argv[1]

with open(inputfile) as f:
totaldelta = 0
for line in f:
line = line.strip()
if line:
fields = line.split()
id1, c1, id2, c2 = fields[0], fields[1], fields[2], fields[3]
if id1 == id2:
delta = int(c2) - int(c1)
p = delta / (int(c2)) * 100 if int(c2) > 0 else 0

totaldelta += delta
print(f"{id1}\t{delta}\t{p:.2f}\t{c1},{c2}")

print(f"Total delta: {totaldelta}")
28 changes: 20 additions & 8 deletions src/bamtocounts.nim
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ proc handler() {.noconv.} =
setControlCHook(handler)

var
do_strict = false
do_norm = false
debug = false
verbose = false
Expand Down Expand Up @@ -67,13 +68,20 @@ proc countsToString(c: stranded_counts, stranded: bool): string =
else:
$(c.fwd + c.rev)

proc alignments_count(table: var OrderedTable[string, stranded_counts], bam:Bam, mapq:uint8, eflag:uint16, regions: target_t): float =
proc alignments_count(table: var OrderedTable[string, stranded_counts], bam:Bam, mapq:uint8, eflag:uint16, regions: target_t, strict = false): float =
var total: float = 0
for read in bam:
total += 1
if read.tid notin regions:
continue
if read.mapping_quality == 0 or ( (read.flag and 1796) != 0):
continue

for region in regions[read.tid]:
#if read.start >= region.start and read.stop <= region.stop:
if (read.stop >= region.start and read.stop <= region.stop) or (read.start >= region.start and read.start <= region.stop):

if (read.stop >= region.start and read.stop <= region.stop) or (read.start >= region.start and read.start <= region.stop):
if strict and ( read.start < region.start or read.stop > region.stop ):
continue
table[region.label].inc(read.flag.reverse)
return total / 1000000

Expand Down Expand Up @@ -108,6 +116,7 @@ Options:
-r, --fasta <fasta> FASTA file for use with CRAM files [default: $env_fasta].
-F, --flag <FLAG> Exclude reads with any of the bits in FLAG set [default: 1796]
-Q, --mapq <mapq> Mapping quality threshold [default: 0]
--strict Read must be contained, not just overlap, with feature
--stranded Print strand-specific counts
--coords Also print coordinates of each feature
Expand Down Expand Up @@ -137,6 +146,7 @@ Options:
do_norm = bool(args["--norm-len"])
do_strand = bool(args["--stranded"])
do_coords = bool(args["--coords"])
do_strict = bool(args["--strict"])
debug = bool(args["--debug"])
verbose = bool(args["--verbose"])
gffIdentifier = $args["--id"]
Expand Down Expand Up @@ -183,18 +193,20 @@ Options:
try:
targetTable = gff_to_table($args["<Target>"], gffField, gffSeparator, gffIdentifier)
except Exception as e:
stderr.writeLine("ERROR: Unable to parse GFF file: ", $args["<Target>"])
stderr.writeLine("ERROR: Unable to parse GFF file: ", $args["<Target>"], ": ", e.msg)
quit(1)
else:
try:
targetTable = bed_to_table($args["<Target>"])
except Exception as e:
stderr.writeLine("ERROR: Unable to parse BED file: ", $args["<Target>"])
stderr.writeLine("ERROR: Unable to parse BED file: ", $args["<Target>"], ": ", e.msg)
quit(1)


if debug:
stderr.writeLine("[OK] Target table loaded")

#stderr.writeLine("Target table: ", targetTable)

if len(targetTable) == 0:
stderr.writeLine("ERROR: No target regions found (try changing --id and --type): see an example line below")
Expand All @@ -213,10 +225,10 @@ Options:
for index, chrName in bam.hdr.targets:
#feature_coords = tuple[chrom, starts, stops, name]
if debug:
stderr.writeLine(" > BAM targets: ", chrName, "-", index)
stderr.writeLine(" > BAM targets: ", chrName, " - index:", index)
if index in cookedTarget:
if debug:
stderr.writeLine(" + Coocked targets: ", chrName, "-", index)
stderr.writeLine(" + Coocked targets: ", chrName, " - index:", index)
for interval in cookedTarget[index]:
let
c : feature_coords = (chrom: chrName.name, starts: $interval.start, stops: $interval.stop, name: interval.label, length: int(interval.stop - interval.start))
Expand Down Expand Up @@ -253,7 +265,7 @@ Options:

if debug:
stderr.writeLine("\\/ Target regions: ", len(targetCounts))
let perMillion = targetCounts.alignments_count(bam, uint8(mapq), eflag, cookedTarget)
let perMillion = targetCounts.alignments_count(bam, uint8(mapq), eflag, cookedTarget, do_strict)
if debug:
stderr.writeLine("/\\ Counts done: ", perMillion)

Expand Down
6 changes: 5 additions & 1 deletion src/covutils.nim
Original file line number Diff line number Diff line change
Expand Up @@ -254,16 +254,20 @@ proc gff_line_to_region*(line: string, gffField = "CDS", gffSeparator = ";", gff
# Try splitting on "="
if len(splittedField) == 2:
reg.name = splittedField[1].strip(chars = {'"', '\'', ' '})

break
else:
let resplittedField = gffAnnotPart.split(" ")
if len(resplittedField) == 2:
reg.name = resplittedField[0].strip(chars = {'"', '\'', ' '})
reg.name = resplittedField[1].strip(chars = {'"', '\'', ' '})

break
else:
reg.name = "Error"

break


except Exception as e:
stderr.write_line("[warning] fields 8 is not a string):", cse[8], "\n ", e.msg)
return nil
Expand Down

0 comments on commit ee9331d

Please sign in to comment.