Skip to content

Commit

Permalink
chore: migrate embedded script
Browse files Browse the repository at this point in the history
  • Loading branch information
adthrasher committed Jan 3, 2025
1 parent 42fe071 commit f553ae4
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 29 deletions.
3 changes: 2 additions & 1 deletion docker/bedtools/2.31.1/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
FROM quay.io/biocontainers/bedtools:2.31.1--hf5e1c6e_2 AS bedtools
FROM python:3.9.19
FROM python:3.13.0

COPY --from=bedtools /usr/local/bin/ /usr/local/bin/
COPY --from=scripts --chmod=777 hic/filter_hic.py /usr/local/bin/filter_hic.py

ENTRYPOINT [ "bash" ]
45 changes: 45 additions & 0 deletions scripts/hic/filter_hic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@

from collections import defaultdict
import argparse

def get_args():
parser = argparse.ArgumentParser(
description="Filter Hi-C data.")
parser.add_argument(
"--prefix", type=str, help="Prefix for output file.")
parser.add_argument(
"--all_valid_pairs", type=str, help="All valid pairs file.")
parser.add_argument(
"--filter_pairs", type=str, help="Filter pairs file.")

args = parser.parse_args()
return args

if __name__ == "__main__":
args = get_args()

f=open(args.filter_pairs)
blackID=defaultdict(int)
while True:
line=f.readline()
if not line:
break
cols=line.strip().split("\t")
ID=cols[0]
blackID[ID]=1

f2=open(args.all_valid_pairs)
outfile1=args.prefix + ".allValidPairs.filtered"
outfile2=args.prefix + ".allValidPairs.removed"
of1=open(outfile1,'w')
of2=open(outfile2,'w')
while True:
line=f2.readline()
if not line:
break
cols=line.strip().split("\t")
id=cols[0]
if id in blackID:
of2.write(line)
else:
of1.write(line)
32 changes: 4 additions & 28 deletions tools/hilow.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -311,34 +311,10 @@ task filter {

cat <(cut -f 4 left.bed) <(cut -f 4 right.bed)|sort -u > filter.pair

python <<CODE
from collections import defaultdict
f=open("filter.pair")
blackID=defaultdict(int)
while True:
line=f.readline()
if not line:
break
cols=line.strip().split("\t")
ID=cols[0]
blackID[ID]=1
f2=open("~{all_valid_pairs}")
outfile1="~{prefix}.allValidPairs.filtered"
outfile2="~{prefix}.allValidPairs.removed"
of1=open(outfile1,'w')
of2=open(outfile2,'w')
while True:
line=f2.readline()
if not line:
break
cols=line.strip().split("\t")
id=cols[0]
if id in blackID:
of2.write(line)
else:
of1.write(line)
CODE
python /usr/local/bin/filter_hic.py \
--all_valid_pairs ~{all_valid_pairs} \
--filter_pair filter.pair \
--prefix ~{prefix}

all=$(wc -l ~{all_valid_pairs} |cut -d " " -f 1)
filtered=$(wc -l ~{prefix}.allValidPairs.removed | cut -d " " -f 1)
Expand Down

0 comments on commit f553ae4

Please sign in to comment.