evotools · RenzoTale88 · Oct 1, 2025 · Oct 1, 2025 · Oct 2, 2025 · Oct 2, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,15 @@
 # Changelog
 All new changes are documented here.
 
+## [v1.1.5]
+### Added
+- Mutation profile of the individual variants composing the SDMs (first and second changes)
+- Allele frequencies in the different populations considered
+
+## [v1.1.4]
+### Added
+- Add process to compute the binned allele frequencies for the derived alleles in each group
+
 ## [v1.1.3]
 ### Fixed
 - `bcftools sort` failing in some single jobs in distributed systems

diff --git a/bin/FreqBinner b/bin/FreqBinner
@@ -0,0 +1,50 @@
+import sys, gzip
+import numpy as np
+
+itv1 = np.round(
+    np.concatenate(( 
+        np.arange(0.0, 0.001, 0.0001), 
+        np.arange(0.001, 0.01, 0.001), 
+        np.arange(0.01, 0.1, 0.01), 
+        np.arange(0.1, 1, 0.1) )), 
+    decimals=5
+)
+itv2 = np.concatenate((itv1[1:], np.array([1.])))
+
+if sys.argv[1] == '/dev/stdin':
+    opn = sys.stdin
+elif '.gz' in sys.argv[1]:
+    opn = gzip.open(sys.argv[1])
+else:
+    opn = open(sys.argv[1])
+
+for n,line in enumerate(opn):
+    try:
+        line = line.decode()
+    except: 
+        pass
+    if n == 0:
+        line = line.strip().split('\t')
+        #vals = { i: [np.zeros(len(itv1), dtype = 'uint64'), np.zeros(len(itv1), dtype = 'uint64')] for i in line[5:] }
+        vals = { i: {'all' : np.zeros(len(itv1), dtype = 'uint64') } for i in line[6:] }
+        cols = line[6:]
+        continue
+    line = line.strip().split('\t')
+    values = map(float, line[6:])
+    for (col, val) in zip(cols, values):
+        if val > 0:
+            binning = np.where(np.logical_and(val <= itv2, val > itv1))[0][0]
+            vals[col]['all'][ binning ] += 1
+            if line[5] in vals[col].keys():
+                vals[col][line[5]][binning] += 1
+            else:
+                vals[col][line[5]] = np.zeros(len(itv1), dtype = 'uint64')
+                vals[col][line[5]][binning] += 1
+
+print('COL\tbinI\tbinE\tChange\ttotChanges\tnChange')
+for col in vals:
+    allvals = vals[col]['all']
+    changes = [k for k in vals[col].keys() if k!='all']
+    for change in changes:
+        for n in range(0, len(itv1)):
+            print( '{}\t{}\t{}\t{}\t{}\t{}'.format(col, itv1[n], itv2[n], change, allvals[n], vals[col][change][n]) )
diff --git a/bin/getCounts3 b/bin/getCounts3
@@ -19,6 +19,17 @@ load(inRdata)
 dat<-dat[which(dat$SameCodon == 1), ]
 uniqued<-dat[!duplicated(dat[c("Id1","Id2","Cons1","Cons2")]),]
 
+# Save uniqued positions for both pos1 and pos2
+pos1 <- dat %>%
+  select(Chr, Pos1) %>%
+  unique()
+write_delim(pos1, paste("uniquePos1.", popu, ".tsv", sep=""), delim="\t", col_names=FALSE)
+pos2 <- dat %>%
+  select(Chr, Pos2) %>%
+  unique()
+write_delim(pos2, paste("uniquePos2.", popu, ".tsv", sep=""), delim="\t", col_names=FALSE)
+
+# Continue with the analyses
 indCounts<-vector()
 indCounts_uniqued<-vector()
 doubleCounts<-vector()

diff --git a/bin/maf2snp.py b/bin/maf2snp.py
@@ -0,0 +1,111 @@
+#!/usr/bin/env python
+import argparse
+from itertools import zip_longest
+
+
+def parse_args():
+    # Argument definition
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-m",
+        "--maf",
+        metavar="maffile.maf",
+        type=str,
+        help="Input alignments",
+        required=True,
+    )
+    parser.add_argument(
+        "-o",
+        "--out",
+        metavar="prefix",
+        type=str,
+        help="Output file prefix.",
+        required=False,
+        default="snps",
+    )
+    parser.add_argument(
+        "-d",
+        "--delim",
+        metavar="char",
+        type=str,
+        help="Delimiter.",
+        required=False,
+        default=",",
+    )
+    parser.add_argument(
+        "--genomes",
+        type=str,
+        help="Genomes to consider.",
+        nargs="+",
+        required=True,
+    )
+    return parser.parse_args()
+
+
+def reader(maf):
+    lines = []
+    infile = open(maf)
+    for line in infile:
+        if "#" in line:
+            continue
+        elif line[0] == "a":
+            continue
+        elif line[0] == "s":
+            lines.append(line)
+            continue
+        elif line.strip() == "" and len(lines) != 0:
+            yield lines
+            lines = []
+        else:
+            lines = []
+    if len(lines) > 0:
+        yield lines
+
+
+SFX = {
+    ",": "csv",
+    ";": "csv",
+    "\t": "tsv",
+    " ": "txt",
+}
+
+
+def get_matching_prefix(string, prefixes):
+    for prefix in prefixes:
+        if string.startswith(prefix):
+            return prefix
+    raise Exception(f'Prefix not found for: {string}')
+
+
+def main():
+    # Get arguments
+    args = parse_args()
+    # Define unique list of input genomes
+    genomes = set(args.genomes)
+
+    # Prepare output
+    if args.out == '-' or args.out == '/dev/stdout':
+        ofname = '/dev/stdout'
+    else:
+        ofname = f"{args.out}.{SFX.get(args.delim, 'txt')}"
+    outfile = open(ofname, "w")
+
+    # Process the data
+    outfile.write("CHROM\tPOS\t{}\n".format("\t".join(args.genomes)))
+    for n, aln in enumerate(reader(args.maf)):
+        _, chrxspp, bpi, alnlen, _, _, _ = aln[0].split()
+        try:
+            _, chromosome = chrxspp.split(".", 1)
+        except:
+            raise Exception(f"Impossible to split: {chrxspp}")
+        splits = [a.split() for a in aln]            
+        sequences = {get_matching_prefix(a[1], args.genomes): a[-1] for a in splits}
+        for genome in genomes.difference(set(sequences.keys())):
+            sequences[genome] = "-" * int(alnlen)
+        zipped = list(zip_longest(*[sequences[g] for g in args.genomes]))
+        for n, bp in enumerate(range(int(bpi), int(bpi) + int(alnlen))):
+            outfile.write("{}\t{}\t{}\n".format(chromosome, bp, "\t".join(zipped[n])))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/bin/sfs2allele.py b/bin/sfs2allele.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python
+import argparse
+import sys
+
+import numpy as np
+
+
+def parse_args():
+    # Argument definition
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-s",
+        "--sfs",
+        metavar="TXT",
+        type=str,
+        help="SFS probabilities",
+        required=True,
+    )
+    parser.add_argument(
+        "-i",
+        "--input",
+        metavar="TXT",
+        type=str,
+        help="Input file for EST-SFS",
+        required=True,
+    )
+    parser.add_argument(
+        "-S",
+        "--sites",
+        metavar="TXT",
+        type=str,
+        help="Site positions for the SFS input",
+        required=False,
+    )
+    parser.add_argument(
+        "-o",
+        "--out",
+        metavar="prefix",
+        type=str,
+        help="Output ancestral allele files.",
+        required=False,
+        default="-",
+    )
+    return parser.parse_args()
+
+
+NTS = ['A', 'C', 'G', 'T']
+
+
+def get_ancestral(maj_p, maj_a, nodes, counts):
+    """Define the ancestral allele."""
+    # If major allele is more likely than the others, return it 
+    if maj_p > 0.5:
+        return maj_a
+    else:
+        alleles = np.array(NTS)
+        counts = np.array(map(int, counts))
+        min_allele_idx = (counts != np.max(counts)) & (counts != 0)
+        min_alleles = set(alleles[min_allele_idx])
+        isecs = set(nodes).intersection(min_alleles)
+        # If one Nt only is possible, return it
+        if len(isecs) == 1:
+            return isecs[0]
+        # Otherwise, return the oldest node in the tree
+        else:
+            return nodes[-1]
+
+
+
+def main():
+    # Get arguments
+    args = parse_args()
+
+    # Get major allele for each site
+    majors = {}
+    with open(args.input) as inputfile:
+        for n, line in enumerate(inputfile):
+            allele_cnts = [int(i) for i in line.strip().split()[0].split(',')]
+            majors[str(n+1)] = {
+                'major': NTS[np.argmax(allele_cnts)],
+                'vector' : line.strip().split()[0],
+                'chrom': None,
+                'pos': None
+            }
+
+    # Add site mapping
+    if args.sites:
+        with open(args.sites) as sitesfile:
+            for n, line in enumerate(sitesfile):
+                chrom, pos = line.strip().split()
+                majors[str(n+1)]['chrom'] = chrom
+                majors[str(n+1)]['pos'] = pos
+
+    # Define output
+    if args.out == '-' or args.out == '/dev/stdout':
+        out = sys.stdout
+    else:
+        out = open(args.out, 'w')
+
+    # Define the ancestral based on the probs
+    nts_combinations = [ NT1+NT2 for NT1 in NTS for NT2 in NTS ]
+    with open(args.sfs) as sfs:
+        for line in sfs:
+            if line[0] == '0':
+                continue
+            site_n, _, maj_prob, probs = line.strip().split(' ', 3)
+            maj_prob = float(maj_prob)
+            outgr_probs = tuple(map(float, probs.split(' ')))
+            max_og_idx = np.argmax(outgr_probs)
+            maj_allele = majors[site_n]['major']
+            vect = majors[site_n]['vector']
+            nodes = nts_combinations[max_og_idx]
+            ancestral = get_ancestral(maj_prob, maj_allele, nodes, vect.split(','))
+            if args.sites:
+                chrom = majors[site_n]['chrom']
+                pos = majors[site_n]['pos']
+                out.write(f"{site_n}\t{chrom}\t{pos}\t{maj_prob}\t{outgr_probs[max_og_idx]}\t{maj_allele}\t{vect}\t{nodes}\t{ancestral}\n")
+            else:
+                out.write(f"{site_n}\t{maj_prob}\t{outgr_probs[max_og_idx]}\t{maj_allele}\t{vect}\t{nodes}\t{ancestral}\n")
+
+
+if __name__ == "__main__":
+    main()