Update patches

garadar · Apr 17, 2024 · 22e1630 · 22e1630
1 parent fa12142
commit 22e1630
Show file tree

Hide file tree

Showing 6 changed files with 339 additions and 14 deletions.
diff --git a/easybuild/easyconfigs/h/HapCUT2/HAPcut-v1.3.3-Enable_MI_Tag.patch b/easybuild/easyconfigs/h/HapCUT2/HAPcut-v1.3.3-Enable_MI_Tag.patch
@@ -0,0 +1,167 @@
+From 0a1706ee3a4ca170e51d426595002b7994e96751 Mon Sep 17 00:00:00 2001
+From: Pontus Hojer <pontushojer@gmail.com>
+Date: Tue, 8 Jun 2021 10:47:48 +0200
+Subject: [PATCH] Enable the use of the MI tag to detect molecules
+
+---
+ utilities/LinkFragments.py | 105 ++++++++++++++++++++++++++++++++++---
+ 1 file changed, 99 insertions(+), 6 deletions(-)
+
+diff --git a/utilities/LinkFragments.py b/utilities/LinkFragments.py
+index e425d8f..ad8279b 100644
+--- a/utilities/LinkFragments.py
++++ b/utilities/LinkFragments.py
+@@ -13,10 +13,10 @@
+ import pysam
+ import argparse
+ import os
++from math import inf
+
+ barcode_tag = 'BX'
+
+-
+ ###############################################################################
+ ###############################################################################
+ # these functions pulled from "getMolecules" script:
+@@ -121,7 +121,93 @@ def get_gemcode_regions(ibam, dist):
+                        max([end for chr, pos, end in gemcodes[barcode]]),
+                        barcode, len(gemcodes[barcode]))
+
+-def get_molecules(bam,ref=None,dist=20000):
++
++def get_gemcode_regions_from_tag(ibam):
++    """
++    Get molecule coordinates from reads that share BX and MI tags.
++
++    Parameters
++    ----------
++    ibam : pysam.AlignmentFile
++        Input 10X bam with set MI and BX tags
++
++    Yields
++    ------
++    region : namedtuple
++        chr, start, end, barcode, readcount
++    """
++    # Create defaultdict for storing molecules
++    # Key is gemcodes + molecule id, value is Molecules instance
++    molecules = defaultdict(Molecule)
++
++    # Set current_chr as reference contig of first read
++    current_chr = None
++
++    # Iterate over reads in bamfile
++    for read in ibam:
++
++        if (read.mapq < min_mapq or read.is_unmapped or read.is_duplicate or read.is_secondary or
++           read.is_qcfail):
++            continue
++
++        assert(read.reference_name is not None)
++        assert(read.reference_start is not None)
++        assert(read.reference_end is not None)
++
++        if not read.has_tag('BX') or not read.has_tag('MI'):
++            continue
++
++        gem = read.get_tag('BX')
++        molecule_id = read.get_tag('MI')
++
++        # Combined gem/barcode and molcule id for storing molecules
++        gem_molecule = (gem, molecule_id)
++
++        # If the read is from a new contig/chromosome, write out all molecules
++        # in memory
++        if read.reference_name != current_chr and current_chr is not None:
++            for _, molecule in molecules.items():
++                yield molecule
++
++            molecules.clear()
++
++        # Add read to molecules
++        molecules[gem_molecule].add(chr=read.reference_name,
++                                    start=read.reference_start,
++                                    end=read.reference_end,
++                                    barcode=gem)
++
++        # Save read contig as current_chr
++        current_chr = read.reference_name
++
++    # Write out all remaining molecules at end of bam
++    for _, molecule in molecules.items():
++        yield molecule
++
++
++class Molecule:
++    def __init__(self):
++        self.chr = None
++        self.start = inf
++        self.end = 0
++        self.barcode = None
++        self.readcount = 0
++
++    def add(self, chr, start, end, barcode):
++        self.set_if_none("chr", chr)
++        self.start = min(self.start, start)
++        self.end = max(self.end, end)
++        self.set_if_none("barcode", barcode)
++        self.readcount += 1
++
++    def set_if_none(self, attr, value):
++        if getattr(self, attr) is None:
++            setattr(self, attr, value)
++        else:
++            assert getattr(self, attr) == value
++
++
++def get_molecules(bam,ref=None,dist=20000, use_tag=False):
+
+     #Open outfile
+     bamf = pysam.AlignmentFile(bam,"rb");
+@@ -131,7 +217,12 @@ def get_molecules(bam,ref=None,dist=20000):
+         ibam = bamf.fetch(reference=ref)
+
+     #Get gemcode regions
+-    for bed in get_gemcode_regions(ibam, dist):
++    if use_tag:
++        regions = get_gemcode_regions_from_tag(ibam)
++    else:
++        regions = get_gemcode_regions(ibam, dist)
++
++    for bed in regions:
+         #Turn molecule object into string
+         yield (bed.chr, bed.start, bed.end, bed.barcode)
+
+@@ -290,7 +381,7 @@ def parse_bedfile(input_file):
+             barcode = el[3]
+             yield (chrom, start, stop, barcode)
+
+-def link_fragments(hairs_file, vcf_file, bam_file, outfile, dist, single_SNP_frags,maxbq):
++def link_fragments(hairs_file, vcf_file, bam_file, outfile, dist, single_SNP_frags,maxbq=40, use_tag=False):
+
+     lines = []
+
+@@ -338,7 +429,7 @@ def link_fragments(hairs_file, vcf_file, bam_file, outfile, dist, single_SNP_fra
+         dup_snp_cover = 0
+
+         # start is 0-indexed start point and stop is 1 past the last residue, 0-indexed
+-        for chrom,start,stop,barcode in get_molecules(bam_file, curr_chrom, dist=dist):
++        for chrom,start,stop,barcode in get_molecules(bam_file, curr_chrom, dist=dist, use_tag=use_tag):
+             #print("molecule: {} {} {} {}".format(chrom,start,stop,barcode))
+             if chrom != curr_chrom:
+                 continue
+@@ -469,6 +560,7 @@ def parseargs():
+     parser.add_argument('-o', '--outfile', nargs='?', type = str, help='output file with linked fragments')
+     parser.add_argument('-d', '--distance', nargs='?', type = int, help='distance in base pairs that delineates separate 10X molecules, default=20kb',default=20000)
+     parser.add_argument('-m', '--maxbq', nargs='?', type = int, help='maximum base quality for an allele call, default=40',default=40)
++    parser.add_argument('--use-tag', action="store_true", default=False, help=f'use molecule tag (MI) to separate between molecules')
+
+     parser.add_argument('-s', '--single_SNP_frags', action='store_true', help='whether to keep fragments overlapping only one SNP', default=False)
+
+@@ -484,4 +576,5 @@ def parseargs():
+ # parse input and run function to call alleles
+ if __name__ == '__main__':
+     args = parseargs()
+-    link_fragments(args.fragments,args.VCF,args.bam_file, args.outfile, args.distance, args.single_SNP_frags,args.maxbq)
++    link_fragments(args.fragments,args.VCF,args.bam_file, args.outfile, args.distance, args.single_SNP_frags,args.maxbq, 
++                   args.use_tag)
diff --git a/easybuild/easyconfigs/h/HapCUT2/HAPcut-v1.3.3-Fix-Makefile00.patch b/easybuild/easyconfigs/h/HapCUT2/HAPcut-v1.3.3-Fix-Makefile00.patch
@@ -0,0 +1,23 @@
+From 96f924e18d70add1841fe5b11c60e8d8d2a872b2 Mon Sep 17 00:00:00 2001
+From: snehring <snehring@iastate.edu>
+Date: Wed, 27 Apr 2022 11:29:07 -0500
+Subject: [PATCH] Makefile changes
+
+---
+ Makefile | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/Makefile b/Makefile
+index c473df9..b0d754d 100644
+--- a/Makefile
++++ b/Makefile
+@@ -2,8 +2,7 @@
+ # HAPCUT2 MAKEFILE
+ default: all
+
+-CC=gcc -g -O3 -Wall -D_GNU_SOURCE
+-CFLAGS=-Wall
++CFLAGS=-Wall -g -O3 -Wall -D_GNU_SOURCE
+
+ # DIRECTORIES
+ B=build
diff --git a/.../HapCUT2/HAPcut-v1.3.3-Fix-Makefile.patch → ...apCUT2/HAPcut-v1.3.3-Fix-Makefile01.patch b/.../HapCUT2/HAPcut-v1.3.3-Fix-Makefile.patch → ...apCUT2/HAPcut-v1.3.3-Fix-Makefile01.patch
diff --git a/easybuild/easyconfigs/h/HapCUT2/HAPcut-v1.3.3-Fix_Add_install_script.patch b/easybuild/easyconfigs/h/HapCUT2/HAPcut-v1.3.3-Fix_Add_install_script.patch
@@ -0,0 +1,103 @@
+From c8fa7a482f8422aa7e3c03bf5a483bf3f485b7f0 Mon Sep 17 00:00:00 2001
+From: Vikas Bansal <vikas115@gmail.com>
+Date: Mon, 31 Jan 2022 13:46:05 -0800
+Subject: [PATCH] - added script install.sh to install htslib (from source) and
+ hapcut2 automatically
+
+- path to htslib changed to htslib
+---
+ Makefile                   |  4 ++--
+ hairs-src/readvariant.c    |  1 +
+ hapcut2-src/hic.c          |  6 +++++-
+ hapcut2-src/optionparser.c |  2 +-
+ install.sh                 | 10 ++++++++++
+ 5 files changed, 19 insertions(+), 4 deletions(-)
+ create mode 100644 install.sh
+
+diff --git a/Makefile b/Makefile
+index dd185cd..c473df9 100644
+--- a/Makefile
++++ b/Makefile
+@@ -9,7 +9,7 @@ CFLAGS=-Wall
+ B=build
+ H=hairs-src
+ X=hapcut2-src
+-HTSLIB=/usr/common/src/htslib #path/to/htslib/
++HTSLIB=htslib #/usr/common/src/htslib #path/to/htslib/
+ T=test
+ # below is the path to CUnit directory, change if need be
+ CUNIT=/usr/include/CUnit
+@@ -19,7 +19,7 @@ all: $(B)/extractHAIRS $(B)/HAPCUT2
+
+ #temporarily removed -O2 flag after -I$(HTSLIB)
+ $(B)/extractHAIRS: $(B)/bamread.o $(B)/hashtable.o $(B)/readvariant.o $(B)/readfasta.o $(B)/hapfragments.o $(H)/extracthairs.c $(H)/parsebamread.c $(H)/realignbamread.c $(H)/nw.c $(H)/realign_pairHMM.c $(H)/estimate_hmm_params.c | $(B)
+-	$(CC) $(CFLAGS) $(LDFLAGS) -I$(HTSLIB) -g $(B)/bamread.o $(B)/hapfragments.o $(B)/hashtable.o $(B)/readfasta.o $(B)/readvariant.o -o $(B)/extractHAIRS $(H)/extracthairs.c -pthread -lhts -lm -lz -lcurl -lcrypto -llzma -lbz2
++	$(CC) $(CFLAGS) $(LDFLAGS) -I$(HTSLIB) -g $(B)/bamread.o $(B)/hapfragments.o $(B)/hashtable.o $(B)/readfasta.o $(B)/readvariant.o -o $(B)/extractHAIRS $(H)/extracthairs.c -pthread -lhts -lm -lz -lcurl -llzma -lbz2
+ #temporarily removed -O2 flag after -I$(HTSLIB)
+
+ $(B)/hapfragments.o: $(H)/hapfragments.c $(H)/hapfragments.h $(H)/readvariant.h | $(B)
+diff --git a/hairs-src/readvariant.c b/hairs-src/readvariant.c
+index 04fe6ba..0a95d29 100644
+--- a/hairs-src/readvariant.c
++++ b/hairs-src/readvariant.c
+@@ -93,6 +93,7 @@ int parse_variant(VARIANT* variant, char* buffer, int samplecol)
+  //       int altalleles = splitString(variant->AA,',',alist);
+
+ 	int gl = strlen(variant->genotype);
++        //fprintf(stderr,"variant genotype %d %s \n",variant->position,variant->genotype);
+ 	// check that the genotype field is diploid
+ 	if ((gl >= 3 && (variant->genotype[1] == '/' || variant->genotype[1] == '|')) &&
+ 			(gl == 3 || variant->genotype[3] == ':') &&
+diff --git a/hapcut2-src/hic.c b/hapcut2-src/hic.c
+index fc85142..2036dde 100644
+--- a/hapcut2-src/hic.c
++++ b/hapcut2-src/hic.c
+@@ -3,6 +3,10 @@
+ float HIC_EM_THRESHOLD = 0.99; 
+ /*
+ functions that are specific to processing of Hi-C reads for phasing: estimating h-trans error rates and I/O
++HTRANS_BINSIZE = 5000;
++HTRANS_MAXBINS = 10000; // this value will be overwritten at startup
++HTRANS_READ_LOWBOUND = 500;
++HTRANS_MAX_WINDOW = 4000000; // maximum window size for h-trans estimation
+ */
+
+ int count_htrans_bins(char* htrans_file) {
+@@ -169,7 +173,7 @@ int estimate_htrans_probs(struct fragment* Flist, int fragments, char* HAP, stru
+         }
+     }
+
+-	// using neighboring bins to calculate mean htrans for each bin
++    // using neighboring bins to calculate mean htrans for each bin if the number of reads in bin is less < 500 (READ_LOWBOUND)
+     for (i = 0; i < HTRANS_MAXBINS; i++){
+         adj_MLE_count[i] = MLE_count[i];
+         adj_MLE_sum[i] = MLE_sum[i];
+diff --git a/hapcut2-src/optionparser.c b/hapcut2-src/optionparser.c
+index 8308ec3..6674956 100644
+--- a/hapcut2-src/optionparser.c
++++ b/hapcut2-src/optionparser.c
+@@ -200,7 +200,7 @@ void print_hapcut_options() {
+     //fprintf(stderr, "--split_threshold, --st <float>:    PHRED SCALED threshold for splitting blocks (range 0-100, larger values split more). default: 0\n");
+     fprintf(stderr, "--call_homozygous, --ch <0/1>:      call positions as homozygous if they appear to be false heterozygotes. default: 0\n");
+     fprintf(stderr, "--discrete_pruning, --dp <0/1>:     use discrete heuristic to prune SNPs. default: 0\n");
+-    fprintf(stderr, "--error_analysis_mode, --ea <0/1>:  compute switch confidence scores and print to haplotype file but don't split blocks or prune. default: 0\n");
++    //fprintf(stderr, "--error_analysis_mode, --ea <0/1>:  compute switch confidence scores and print to haplotype file but don't split blocks or prune. default: 0\n");
+
+     fprintf(stderr, "\nAdvanced Options:\n");
+     fprintf(stderr, "--new_format, --nf <0/1>:           use new Hi-C fragment matrix file format (but don't do h-trans error modeling). default: 0\n");
+diff --git a/install.sh b/install.sh
+new file mode 100644
+index 0000000..629ebad
+--- /dev/null
++++ b/install.sh
+@@ -0,0 +1,10 @@
++git clone https://github.com/vibansal/HapCUT2.git
++cd hapcut2
++git clone https://github.com/samtools/htslib.git
++cd htslib
++autoreconf -i
++./configure
++make
++cd ..
++export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$(pwd)/htslib"
++make
diff --git a/easybuild/easyconfigs/h/HapCUT2/HAPcut-v1.3.3-Fix_build_errors.patch b/easybuild/easyconfigs/h/HapCUT2/HAPcut-v1.3.3-Fix_build_errors.patch
@@ -0,0 +1,36 @@
+From 65771565f29aa61f1fa7e8319a39c8a65cc19824 Mon Sep 17 00:00:00 2001
+From: snehring <snehring@iastate.edu>
+Date: Wed, 27 Apr 2022 11:28:35 -0500
+Subject: [PATCH] fixing build errors with newer gcc versions
+
+---
+ hairs-src/hashtable.c | 2 +-
+ hairs-src/hashtable.h | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/hairs-src/hashtable.c b/hairs-src/hashtable.c
+index d221547..b4a1c58 100644
+--- a/hairs-src/hashtable.c
++++ b/hairs-src/hashtable.c
+@@ -39,7 +39,7 @@ int insert_keyvalue(HASHTABLE* ht, char* key, int slen, int value) {
+
+ int getindex(HASHTABLE* ht, char* chrom) {
+     int hash = hashstring(chrom, ht->htsize);
+-    keypointer = ht->blist[hash];
++    keyvalue* keypointer = ht->blist[hash];
+     while (keypointer != NULL) {
+         if (strcmp(keypointer->key, chrom) == 0) return keypointer->value;
+         keypointer = keypointer->next;
+diff --git a/hairs-src/hashtable.h b/hairs-src/hashtable.h
+index 1cb566b..3153da2 100644
+--- a/hairs-src/hashtable.h
++++ b/hairs-src/hashtable.h
+@@ -13,7 +13,7 @@ typedef struct keyvalue {
+     struct keyvalue* next;
+ } keyvalue;
+
+-keyvalue* keypointer;
++extern keyvalue* keypointer;
+
+ typedef struct HASHTABLE {
+     int htsize; // prime number that is also the size of HASHTABLE
diff --git a/easybuild/easyconfigs/h/HapCUT2/HapCUT2-v1.3.3-GCC-12.3.0.eb b/easybuild/easyconfigs/h/HapCUT2/HapCUT2-v1.3.3-GCC-12.3.0.eb
@@ -25,23 +25,19 @@ toolchain = {'name': 'GCC', 'version': '12.3.0'}
 source_urls = ['https://github.com/vibansal/HapCUT2/archive/refs/tags/']
 sources = ['%(version)s.tar.gz']
 patches = [
-    'https://github.com/vibansal/HapCUT2/commit/0a1706ee3a4ca170e51d426595002b7994e96751.patch',
-    'https://github.com/vibansal/HapCUT2/commit/65771565f29aa61f1fa7e8319a39c8a65cc19824.patch',
-    'https://github.com/vibansal/HapCUT2/commit/c8fa7a482f8422aa7e3c03bf5a483bf3f485b7f0.patch',
-    'https://github.com/vibansal/HapCUT2/commit/96f924e18d70add1841fe5b11c60e8d8d2a872b2.patch',
-    'HAPcut-v1.3.3-Fix-Makefile.patch',
+    'HAPcut-v1.3.3-Enable_MI_Tag.patch',
+    'HAPcut-v1.3.3-Fix_build_errors.patch',
+    'HAPcut-v1.3.3-Fix_Add_install_script.patch',
+    'HAPcut-v1.3.3-Fix-Makefile00.patch',
+    'HAPcut-v1.3.3-Fix-Makefile01.patch',
 ]
 checksums = [
     {'v1.3.3.tar.gz': '95b832e667638aff1de51721e54c906dfe8b46793b1ca8ecf1b6790d4f84aade'},
-    {'0a1706ee3a4ca170e51d426595002b7994e96751.patch':
-     'bf40027ae5749f9ed4568373b9c4d39c8cb6dae4d51f39b1bb53433882d0b025'},
-    {'65771565f29aa61f1fa7e8319a39c8a65cc19824.patch':
-     '8c6eeaa87e8df94da92d57d00cd1ba07f8a046a903e4186edea829f6a2416100'},
-    {'c8fa7a482f8422aa7e3c03bf5a483bf3f485b7f0.patch':
-     'eb274546ebc5c056161a0c58336bf43aea6d1e53b07c927f94fba7b75ee3c58f'},
-    {'96f924e18d70add1841fe5b11c60e8d8d2a872b2.patch':
-     '0d3a1698d6f3299f00358aab7d4093ba08882a025aeac9bba9115d78ec5ba7ab'},
-    {'HAPcut-v1.3.3-Fix-Makefile.patch': '82785f2ea635763e85dafa7338712b6bb5307b9664728a0883858db0b9614e15'},
+    {'HAPcut-v1.3.3-Enable_MI_Tag.patch': 'bf40027ae5749f9ed4568373b9c4d39c8cb6dae4d51f39b1bb53433882d0b025'},
+    {'HAPcut-v1.3.3-Fix_build_errors.patch': '8c6eeaa87e8df94da92d57d00cd1ba07f8a046a903e4186edea829f6a2416100'},
+    {'HAPcut-v1.3.3-Fix_Add_install_script.patch': 'eb274546ebc5c056161a0c58336bf43aea6d1e53b07c927f94fba7b75ee3c58f'},
+    {'HAPcut-v1.3.3-Fix-Makefile00.patch': '0d3a1698d6f3299f00358aab7d4093ba08882a025aeac9bba9115d78ec5ba7ab'},
+    {'HAPcut-v1.3.3-Fix-Makefile01.patch': '82785f2ea635763e85dafa7338712b6bb5307b9664728a0883858db0b9614e15'},
 ]
 
 dependencies = [