Skip to content

Commit

Permalink
implemented Tajima's D
Browse files Browse the repository at this point in the history
  • Loading branch information
dhuson committed Mar 16, 2024
1 parent 376d86f commit dc53782
Showing 1 changed file with 143 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,38 +20,178 @@
package splitstree6.algorithms.characters.characters2report;


import javafx.beans.property.BooleanProperty;
import javafx.beans.property.SimpleBooleanProperty;
import jloda.fx.util.ProgramProperties;
import jloda.util.CanceledException;
import jloda.util.progress.ProgressListener;
import splitstree6.data.CharactersBlock;
import splitstree6.data.TaxaBlock;
import splitstree6.data.parts.Taxon;

import java.util.BitSet;
import java.util.Collection;
import java.util.List;

/**
* Estimates Tajima's D
* Daniel Huson and David Bryant, 2024
* Daniel Huson, 2024
*/
public class TajimaD extends AnalyzeCharactersBase {

private final BooleanProperty optionExcludeGapSites = new SimpleBooleanProperty(this, "optionExcludeGapSites");

{
ProgramProperties.track(optionExcludeGapSites, true);
}

@Override
public List<String> listOptions() {
return List.of(optionExcludeGapSites.getName());
}

@Override
public String getToolTip(String optionName) {
if (optionExcludeGapSites.getName().endsWith(optionName))
return "Exclude gapped sites from calculation.";
else
return super.getToolTip(optionName);
}

@Override
public String getCitation() {
return "Tajima 1989;F. Tajima,Statistical method for testing the neutral mutation hypothesis by DNA polymorphism. Genetics. 123(3):585–95, 1989";
}

@Override
public String getShortDescription() {
return "Performs a statistical test for detecting the presence of recombination.";
return "Performs Tajima's D test to determine whether a DNA sequence is evolving neutrally.";
}

@Override
String runAnalysis(ProgressListener progress, TaxaBlock taxaBlock, CharactersBlock charactersBlock, Collection<Taxon> selectedTaxa) throws CanceledException {
return "not implemented";
char[][] alignment = charactersBlock.getMatrix();
var n = alignment.length;
var length = alignment[0].length;

if (optionExcludeGapSites.get()) {
var gappedSites = new BitSet();
for (char[] chars : alignment) {
for (var j = 0; j < chars.length; j++) {
if (chars[j] == charactersBlock.getGapCharacter()
|| chars[j] == charactersBlock.getMissingCharacter())
gappedSites.set(j);
}
}
if (gappedSites.cardinality() > 0) {
var tmp = new char[n][length - gappedSites.cardinality()];
for (var i = 0; i < n; i++) {
var pos = 0;
for (var j = gappedSites.nextClearBit(0); j != -1 && j < length; j = gappedSites.nextClearBit(j + 1)) {
tmp[i][pos++] = alignment[i][j];
}
}
alignment = tmp;
length -= gappedSites.cardinality();
if (length == 0)
return "No ungapped sites found";
}
}

var S = calculateSegregatingSites(alignment, length);
var a1 = sumOfInverses(n - 1);
var a2 = sumOfInverseSquares(n - 1);
var b1 = (n + 1) / (3.0 * (n - 1));
var b2 = 2.0 * (n * n + n + 3) / (9.0 * n * (n - 1));
var c1 = b1 - 1.0 / a1;
var c2 = b2 - (n + 2.0) / (a1 * n) + a2 / (a1 * a1);
var e1 = c1 / a1;
var e2 = c2 / (a1 * a1 + a2);
var variance = e1 * S + e2 * S * (S - 1);

var wattersonEstimator = S / a1;
var piEstimator = calculatePiEstimator(charactersBlock.getMatrix());

var D = (piEstimator - wattersonEstimator) / Math.sqrt(variance);

String text;
if (D <= -2.0)
text = "indicates expanding population";
else if (D >= 2)
text = "indicates declining population";
else text = "test is inconclusive";

return "Tajima's D = %.1f - %s (calculation based on %d sites)%n".formatted(D, text, length);
}

@Override
public boolean isApplicable(TaxaBlock taxa, CharactersBlock datablock) {
return datablock.getDataType().isNucleotides();
}

private static int calculateSegregatingSites(char[][] alignment, int length) {
var s = 0;
for (var j = 0; j < length; j++) {
if (isSegregatingSite(alignment, j)) {
s++;
}
}
return s;
}

private static boolean isSegregatingSite(char[][] alignment, int column) {
var base = alignment[0][column];
for (var i = 1; i < alignment.length; i++) {
if (alignment[i][column] != base) {
return true;
}
}
return false;
}

private static double sumOfInverses(int n) {
var sum = 0.0;
for (var i = 1; i <= n; i++) {
sum += 1.0 / i;
}
return sum;
}

private static double sumOfInverseSquares(int n) {
var sum = 0.0;
for (var i = 1; i <= n; i++) {
sum += 1.0 / (i * i);
}
return sum;
}

public static double calculatePiEstimator(char[][] alignment) {
var n = alignment.length;
var length = alignment[0].length;
var totalPairwiseDifferences = 0.0;

for (var i = 0; i < n; i++) {
for (var j = i + 1; j < n; j++) {
totalPairwiseDifferences += calculatePairwiseDifferences(alignment[i], alignment[j], length);
}
}

// Divide by total number of pairwise comparisons
int numComparisons = n * (n - 1) / 2;
return totalPairwiseDifferences / numComparisons;
}

private static int calculatePairwiseDifferences(char[] seq1, char[] seq2, int length) {
var differences = 0;
for (var i = 0; i < length; i++) {
if (seq1[i] != seq2[i]) {
differences++;
}
}
return differences;
}

public BooleanProperty optionExcludeGapSitesProperty() {
return optionExcludeGapSites;
}
}

0 comments on commit dc53782

Please sign in to comment.