Skip to content

Commit

Permalink
Update patches
Browse files Browse the repository at this point in the history
  • Loading branch information
alberta committed Apr 17, 2024
1 parent fa12142 commit 22e1630
Show file tree
Hide file tree
Showing 6 changed files with 339 additions and 14 deletions.
167 changes: 167 additions & 0 deletions easybuild/easyconfigs/h/HapCUT2/HAPcut-v1.3.3-Enable_MI_Tag.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
From 0a1706ee3a4ca170e51d426595002b7994e96751 Mon Sep 17 00:00:00 2001
From: Pontus Hojer <pontushojer@gmail.com>
Date: Tue, 8 Jun 2021 10:47:48 +0200
Subject: [PATCH] Enable the use of the MI tag to detect molecules

---
utilities/LinkFragments.py | 105 ++++++++++++++++++++++++++++++++++---
1 file changed, 99 insertions(+), 6 deletions(-)

diff --git a/utilities/LinkFragments.py b/utilities/LinkFragments.py
index e425d8f..ad8279b 100644
--- a/utilities/LinkFragments.py
+++ b/utilities/LinkFragments.py
@@ -13,10 +13,10 @@
import pysam
import argparse
import os
+from math import inf

barcode_tag = 'BX'

-
###############################################################################
###############################################################################
# these functions pulled from "getMolecules" script:
@@ -121,7 +121,93 @@ def get_gemcode_regions(ibam, dist):
max([end for chr, pos, end in gemcodes[barcode]]),
barcode, len(gemcodes[barcode]))

-def get_molecules(bam,ref=None,dist=20000):
+
+def get_gemcode_regions_from_tag(ibam):
+ """
+ Get molecule coordinates from reads that share BX and MI tags.
+
+ Parameters
+ ----------
+ ibam : pysam.AlignmentFile
+ Input 10X bam with set MI and BX tags
+
+ Yields
+ ------
+ region : namedtuple
+ chr, start, end, barcode, readcount
+ """
+ # Create defaultdict for storing molecules
+ # Key is gemcodes + molecule id, value is Molecules instance
+ molecules = defaultdict(Molecule)
+
+ # Set current_chr as reference contig of first read
+ current_chr = None
+
+ # Iterate over reads in bamfile
+ for read in ibam:
+
+ if (read.mapq < min_mapq or read.is_unmapped or read.is_duplicate or read.is_secondary or
+ read.is_qcfail):
+ continue
+
+ assert(read.reference_name is not None)
+ assert(read.reference_start is not None)
+ assert(read.reference_end is not None)
+
+ if not read.has_tag('BX') or not read.has_tag('MI'):
+ continue
+
+ gem = read.get_tag('BX')
+ molecule_id = read.get_tag('MI')
+
+ # Combined gem/barcode and molcule id for storing molecules
+ gem_molecule = (gem, molecule_id)
+
+ # If the read is from a new contig/chromosome, write out all molecules
+ # in memory
+ if read.reference_name != current_chr and current_chr is not None:
+ for _, molecule in molecules.items():
+ yield molecule
+
+ molecules.clear()
+
+ # Add read to molecules
+ molecules[gem_molecule].add(chr=read.reference_name,
+ start=read.reference_start,
+ end=read.reference_end,
+ barcode=gem)
+
+ # Save read contig as current_chr
+ current_chr = read.reference_name
+
+ # Write out all remaining molecules at end of bam
+ for _, molecule in molecules.items():
+ yield molecule
+
+
+class Molecule:
+ def __init__(self):
+ self.chr = None
+ self.start = inf
+ self.end = 0
+ self.barcode = None
+ self.readcount = 0
+
+ def add(self, chr, start, end, barcode):
+ self.set_if_none("chr", chr)
+ self.start = min(self.start, start)
+ self.end = max(self.end, end)
+ self.set_if_none("barcode", barcode)
+ self.readcount += 1
+
+ def set_if_none(self, attr, value):
+ if getattr(self, attr) is None:
+ setattr(self, attr, value)
+ else:
+ assert getattr(self, attr) == value
+
+
+def get_molecules(bam,ref=None,dist=20000, use_tag=False):

#Open outfile
bamf = pysam.AlignmentFile(bam,"rb");
@@ -131,7 +217,12 @@ def get_molecules(bam,ref=None,dist=20000):
ibam = bamf.fetch(reference=ref)

#Get gemcode regions
- for bed in get_gemcode_regions(ibam, dist):
+ if use_tag:
+ regions = get_gemcode_regions_from_tag(ibam)
+ else:
+ regions = get_gemcode_regions(ibam, dist)
+
+ for bed in regions:
#Turn molecule object into string
yield (bed.chr, bed.start, bed.end, bed.barcode)

@@ -290,7 +381,7 @@ def parse_bedfile(input_file):
barcode = el[3]
yield (chrom, start, stop, barcode)

-def link_fragments(hairs_file, vcf_file, bam_file, outfile, dist, single_SNP_frags,maxbq):
+def link_fragments(hairs_file, vcf_file, bam_file, outfile, dist, single_SNP_frags,maxbq=40, use_tag=False):

lines = []

@@ -338,7 +429,7 @@ def link_fragments(hairs_file, vcf_file, bam_file, outfile, dist, single_SNP_fra
dup_snp_cover = 0

# start is 0-indexed start point and stop is 1 past the last residue, 0-indexed
- for chrom,start,stop,barcode in get_molecules(bam_file, curr_chrom, dist=dist):
+ for chrom,start,stop,barcode in get_molecules(bam_file, curr_chrom, dist=dist, use_tag=use_tag):
#print("molecule: {} {} {} {}".format(chrom,start,stop,barcode))
if chrom != curr_chrom:
continue
@@ -469,6 +560,7 @@ def parseargs():
parser.add_argument('-o', '--outfile', nargs='?', type = str, help='output file with linked fragments')
parser.add_argument('-d', '--distance', nargs='?', type = int, help='distance in base pairs that delineates separate 10X molecules, default=20kb',default=20000)
parser.add_argument('-m', '--maxbq', nargs='?', type = int, help='maximum base quality for an allele call, default=40',default=40)
+ parser.add_argument('--use-tag', action="store_true", default=False, help=f'use molecule tag (MI) to separate between molecules')

parser.add_argument('-s', '--single_SNP_frags', action='store_true', help='whether to keep fragments overlapping only one SNP', default=False)

@@ -484,4 +576,5 @@ def parseargs():
# parse input and run function to call alleles
if __name__ == '__main__':
args = parseargs()
- link_fragments(args.fragments,args.VCF,args.bam_file, args.outfile, args.distance, args.single_SNP_frags,args.maxbq)
+ link_fragments(args.fragments,args.VCF,args.bam_file, args.outfile, args.distance, args.single_SNP_frags,args.maxbq,
+ args.use_tag)
23 changes: 23 additions & 0 deletions easybuild/easyconfigs/h/HapCUT2/HAPcut-v1.3.3-Fix-Makefile00.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
From 96f924e18d70add1841fe5b11c60e8d8d2a872b2 Mon Sep 17 00:00:00 2001
From: snehring <snehring@iastate.edu>
Date: Wed, 27 Apr 2022 11:29:07 -0500
Subject: [PATCH] Makefile changes

---
Makefile | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index c473df9..b0d754d 100644
--- a/Makefile
+++ b/Makefile
@@ -2,8 +2,7 @@
# HAPCUT2 MAKEFILE
default: all

-CC=gcc -g -O3 -Wall -D_GNU_SOURCE
-CFLAGS=-Wall
+CFLAGS=-Wall -g -O3 -Wall -D_GNU_SOURCE

# DIRECTORIES
B=build
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
From c8fa7a482f8422aa7e3c03bf5a483bf3f485b7f0 Mon Sep 17 00:00:00 2001
From: Vikas Bansal <vikas115@gmail.com>
Date: Mon, 31 Jan 2022 13:46:05 -0800
Subject: [PATCH] - added script install.sh to install htslib (from source) and
hapcut2 automatically

- path to htslib changed to htslib
---
Makefile | 4 ++--
hairs-src/readvariant.c | 1 +
hapcut2-src/hic.c | 6 +++++-
hapcut2-src/optionparser.c | 2 +-
install.sh | 10 ++++++++++
5 files changed, 19 insertions(+), 4 deletions(-)
create mode 100644 install.sh

diff --git a/Makefile b/Makefile
index dd185cd..c473df9 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ CFLAGS=-Wall
B=build
H=hairs-src
X=hapcut2-src
-HTSLIB=/usr/common/src/htslib #path/to/htslib/
+HTSLIB=htslib #/usr/common/src/htslib #path/to/htslib/
T=test
# below is the path to CUnit directory, change if need be
CUNIT=/usr/include/CUnit
@@ -19,7 +19,7 @@ all: $(B)/extractHAIRS $(B)/HAPCUT2

#temporarily removed -O2 flag after -I$(HTSLIB)
$(B)/extractHAIRS: $(B)/bamread.o $(B)/hashtable.o $(B)/readvariant.o $(B)/readfasta.o $(B)/hapfragments.o $(H)/extracthairs.c $(H)/parsebamread.c $(H)/realignbamread.c $(H)/nw.c $(H)/realign_pairHMM.c $(H)/estimate_hmm_params.c | $(B)
- $(CC) $(CFLAGS) $(LDFLAGS) -I$(HTSLIB) -g $(B)/bamread.o $(B)/hapfragments.o $(B)/hashtable.o $(B)/readfasta.o $(B)/readvariant.o -o $(B)/extractHAIRS $(H)/extracthairs.c -pthread -lhts -lm -lz -lcurl -lcrypto -llzma -lbz2
+ $(CC) $(CFLAGS) $(LDFLAGS) -I$(HTSLIB) -g $(B)/bamread.o $(B)/hapfragments.o $(B)/hashtable.o $(B)/readfasta.o $(B)/readvariant.o -o $(B)/extractHAIRS $(H)/extracthairs.c -pthread -lhts -lm -lz -lcurl -llzma -lbz2
#temporarily removed -O2 flag after -I$(HTSLIB)

$(B)/hapfragments.o: $(H)/hapfragments.c $(H)/hapfragments.h $(H)/readvariant.h | $(B)
diff --git a/hairs-src/readvariant.c b/hairs-src/readvariant.c
index 04fe6ba..0a95d29 100644
--- a/hairs-src/readvariant.c
+++ b/hairs-src/readvariant.c
@@ -93,6 +93,7 @@ int parse_variant(VARIANT* variant, char* buffer, int samplecol)
// int altalleles = splitString(variant->AA,',',alist);

int gl = strlen(variant->genotype);
+ //fprintf(stderr,"variant genotype %d %s \n",variant->position,variant->genotype);
// check that the genotype field is diploid
if ((gl >= 3 && (variant->genotype[1] == '/' || variant->genotype[1] == '|')) &&
(gl == 3 || variant->genotype[3] == ':') &&
diff --git a/hapcut2-src/hic.c b/hapcut2-src/hic.c
index fc85142..2036dde 100644
--- a/hapcut2-src/hic.c
+++ b/hapcut2-src/hic.c
@@ -3,6 +3,10 @@
float HIC_EM_THRESHOLD = 0.99;
/*
functions that are specific to processing of Hi-C reads for phasing: estimating h-trans error rates and I/O
+HTRANS_BINSIZE = 5000;
+HTRANS_MAXBINS = 10000; // this value will be overwritten at startup
+HTRANS_READ_LOWBOUND = 500;
+HTRANS_MAX_WINDOW = 4000000; // maximum window size for h-trans estimation
*/

int count_htrans_bins(char* htrans_file) {
@@ -169,7 +173,7 @@ int estimate_htrans_probs(struct fragment* Flist, int fragments, char* HAP, stru
}
}

- // using neighboring bins to calculate mean htrans for each bin
+ // using neighboring bins to calculate mean htrans for each bin if the number of reads in bin is less < 500 (READ_LOWBOUND)
for (i = 0; i < HTRANS_MAXBINS; i++){
adj_MLE_count[i] = MLE_count[i];
adj_MLE_sum[i] = MLE_sum[i];
diff --git a/hapcut2-src/optionparser.c b/hapcut2-src/optionparser.c
index 8308ec3..6674956 100644
--- a/hapcut2-src/optionparser.c
+++ b/hapcut2-src/optionparser.c
@@ -200,7 +200,7 @@ void print_hapcut_options() {
//fprintf(stderr, "--split_threshold, --st <float>: PHRED SCALED threshold for splitting blocks (range 0-100, larger values split more). default: 0\n");
fprintf(stderr, "--call_homozygous, --ch <0/1>: call positions as homozygous if they appear to be false heterozygotes. default: 0\n");
fprintf(stderr, "--discrete_pruning, --dp <0/1>: use discrete heuristic to prune SNPs. default: 0\n");
- fprintf(stderr, "--error_analysis_mode, --ea <0/1>: compute switch confidence scores and print to haplotype file but don't split blocks or prune. default: 0\n");
+ //fprintf(stderr, "--error_analysis_mode, --ea <0/1>: compute switch confidence scores and print to haplotype file but don't split blocks or prune. default: 0\n");

fprintf(stderr, "\nAdvanced Options:\n");
fprintf(stderr, "--new_format, --nf <0/1>: use new Hi-C fragment matrix file format (but don't do h-trans error modeling). default: 0\n");
diff --git a/install.sh b/install.sh
new file mode 100644
index 0000000..629ebad
--- /dev/null
+++ b/install.sh
@@ -0,0 +1,10 @@
+git clone https://github.com/vibansal/HapCUT2.git
+cd hapcut2
+git clone https://github.com/samtools/htslib.git
+cd htslib
+autoreconf -i
+./configure
+make
+cd ..
+export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$(pwd)/htslib"
+make
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
From 65771565f29aa61f1fa7e8319a39c8a65cc19824 Mon Sep 17 00:00:00 2001
From: snehring <snehring@iastate.edu>
Date: Wed, 27 Apr 2022 11:28:35 -0500
Subject: [PATCH] fixing build errors with newer gcc versions

---
hairs-src/hashtable.c | 2 +-
hairs-src/hashtable.h | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/hairs-src/hashtable.c b/hairs-src/hashtable.c
index d221547..b4a1c58 100644
--- a/hairs-src/hashtable.c
+++ b/hairs-src/hashtable.c
@@ -39,7 +39,7 @@ int insert_keyvalue(HASHTABLE* ht, char* key, int slen, int value) {

int getindex(HASHTABLE* ht, char* chrom) {
int hash = hashstring(chrom, ht->htsize);
- keypointer = ht->blist[hash];
+ keyvalue* keypointer = ht->blist[hash];
while (keypointer != NULL) {
if (strcmp(keypointer->key, chrom) == 0) return keypointer->value;
keypointer = keypointer->next;
diff --git a/hairs-src/hashtable.h b/hairs-src/hashtable.h
index 1cb566b..3153da2 100644
--- a/hairs-src/hashtable.h
+++ b/hairs-src/hashtable.h
@@ -13,7 +13,7 @@ typedef struct keyvalue {
struct keyvalue* next;
} keyvalue;

-keyvalue* keypointer;
+extern keyvalue* keypointer;

typedef struct HASHTABLE {
int htsize; // prime number that is also the size of HASHTABLE
24 changes: 10 additions & 14 deletions easybuild/easyconfigs/h/HapCUT2/HapCUT2-v1.3.3-GCC-12.3.0.eb
Original file line number Diff line number Diff line change
Expand Up @@ -25,23 +25,19 @@ toolchain = {'name': 'GCC', 'version': '12.3.0'}
source_urls = ['https://github.com/vibansal/HapCUT2/archive/refs/tags/']
sources = ['%(version)s.tar.gz']
patches = [
'https://github.com/vibansal/HapCUT2/commit/0a1706ee3a4ca170e51d426595002b7994e96751.patch',
'https://github.com/vibansal/HapCUT2/commit/65771565f29aa61f1fa7e8319a39c8a65cc19824.patch',
'https://github.com/vibansal/HapCUT2/commit/c8fa7a482f8422aa7e3c03bf5a483bf3f485b7f0.patch',
'https://github.com/vibansal/HapCUT2/commit/96f924e18d70add1841fe5b11c60e8d8d2a872b2.patch',
'HAPcut-v1.3.3-Fix-Makefile.patch',
'HAPcut-v1.3.3-Enable_MI_Tag.patch',
'HAPcut-v1.3.3-Fix_build_errors.patch',
'HAPcut-v1.3.3-Fix_Add_install_script.patch',
'HAPcut-v1.3.3-Fix-Makefile00.patch',
'HAPcut-v1.3.3-Fix-Makefile01.patch',
]
checksums = [
{'v1.3.3.tar.gz': '95b832e667638aff1de51721e54c906dfe8b46793b1ca8ecf1b6790d4f84aade'},
{'0a1706ee3a4ca170e51d426595002b7994e96751.patch':
'bf40027ae5749f9ed4568373b9c4d39c8cb6dae4d51f39b1bb53433882d0b025'},
{'65771565f29aa61f1fa7e8319a39c8a65cc19824.patch':
'8c6eeaa87e8df94da92d57d00cd1ba07f8a046a903e4186edea829f6a2416100'},
{'c8fa7a482f8422aa7e3c03bf5a483bf3f485b7f0.patch':
'eb274546ebc5c056161a0c58336bf43aea6d1e53b07c927f94fba7b75ee3c58f'},
{'96f924e18d70add1841fe5b11c60e8d8d2a872b2.patch':
'0d3a1698d6f3299f00358aab7d4093ba08882a025aeac9bba9115d78ec5ba7ab'},
{'HAPcut-v1.3.3-Fix-Makefile.patch': '82785f2ea635763e85dafa7338712b6bb5307b9664728a0883858db0b9614e15'},
{'HAPcut-v1.3.3-Enable_MI_Tag.patch': 'bf40027ae5749f9ed4568373b9c4d39c8cb6dae4d51f39b1bb53433882d0b025'},
{'HAPcut-v1.3.3-Fix_build_errors.patch': '8c6eeaa87e8df94da92d57d00cd1ba07f8a046a903e4186edea829f6a2416100'},
{'HAPcut-v1.3.3-Fix_Add_install_script.patch': 'eb274546ebc5c056161a0c58336bf43aea6d1e53b07c927f94fba7b75ee3c58f'},
{'HAPcut-v1.3.3-Fix-Makefile00.patch': '0d3a1698d6f3299f00358aab7d4093ba08882a025aeac9bba9115d78ec5ba7ab'},
{'HAPcut-v1.3.3-Fix-Makefile01.patch': '82785f2ea635763e85dafa7338712b6bb5307b9664728a0883858db0b9614e15'},
]

dependencies = [
Expand Down

0 comments on commit 22e1630

Please sign in to comment.