Skip to content

Commit

Permalink
Merge pull request #51 from compneurobilbao/feature-clinical-stat-mea…
Browse files Browse the repository at this point in the history
…sures

ENH: Adds more statistics to `clinical_groups` delta t-tests
  • Loading branch information
itellaetxe authored Sep 20, 2024
2 parents 14c3893 + 0718341 commit c02c478
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 3 deletions.
18 changes: 18 additions & 0 deletions src/ageml/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,21 @@ def covariate_correction(X, Z, beta=None):
X_residual = X - Z @ beta

return X_residual, beta


def cohen_d(group1, group2):
# Calculate the size of each group
n1, n2 = len(group1), len(group2)

# Calculate the mean of each group
mean1, mean2 = np.mean(group1), np.mean(group2)

# Calculate the variance of each group
var1, var2 = np.var(group1, ddof=1), np.var(group2, ddof=1)

# Calculate the pooled standard deviation
pooled_std = np.sqrt(((n1 - 1) * var1 + (n2 - 1) * var2) / (n1 + n2 - 2))

# Calculate Cohen's d
d = (mean1 - mean2) / pooled_std
return d
30 changes: 27 additions & 3 deletions src/ageml/ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,15 @@

from datetime import datetime
from statsmodels.stats.multitest import multipletests
from statsmodels.stats.weightstats import CompareMeans, DescrStatsW
import scipy.stats as stats

import ageml.messages as messages
from ageml.visualizer import Visualizer
from ageml.utils import create_directory, feature_extractor, significant_markers, convert, log, NameTag
from ageml.utils import (create_directory, feature_extractor,
significant_markers, convert, log, NameTag)
from ageml.modelling import AgeML, Classifier
from ageml.processing import find_correlations, covariate_correction
from ageml.processing import find_correlations, covariate_correction, cohen_d


class Interface:
Expand Down Expand Up @@ -964,14 +966,30 @@ def deltas_by_group(self, dfs, tag, significance: float = 0.05):

# Obtain statistically significant difference between deltas
print("Checking for statistically significant differences between deltas...")
header = ("p-val (group1 vs. group2) "
"Cohen\'s d "
"CI (95%): [low, upp] "
"Delta difference(mean1 - mean2)")
print(len(str(header)) * "=")
print("significance: %.2g * -> FDR, ** -> bonferroni" % significance)
print(str(header))
print(len(str(header)) * "-")

# Calculate p-values
p_vals_matrix = np.zeros((len(deltas), len(deltas)))
effect_size_matrix = np.zeros((len(deltas), len(deltas)))
conf_intervals = np.zeros((len(deltas), len(deltas), 2)) # Lower & upper bounds

for i in range(len(deltas)):
for j in range(i + 1, len(deltas)):
_, p_val = stats.ttest_ind(deltas[i], deltas[j])
p_vals_matrix[i, j] = p_val
effect_size_matrix[i, j] = cohen_d(deltas[i], deltas[j])
# Compute confidene interval for the mean difference
cm = CompareMeans(DescrStatsW(deltas[i]), DescrStatsW(deltas[j]))
# We use unequal variance, Welch's test and Satterthwaite's dofs
ci_low, ci_upp = cm.tconfint_diff(alpha=significance, usevar='unequal')
conf_intervals[i, j] = [ci_low, ci_upp]

# Reject null hypothesis of no correlation
reject_bon, _, _, _ = multipletests(p_vals_matrix.flatten(), alpha=significance, method='bonferroni')
Expand All @@ -984,10 +1002,16 @@ def deltas_by_group(self, dfs, tag, significance: float = 0.05):
for i in range(len(deltas)):
significant = significant_markers(reject_bon[i], reject_fdr[i])
for j in range(i + 1, len(deltas)):
pval_message = "p-value between %s and %s: %.2g" % (
pval_message = "p-val (%s vs. %s): %.2g" % (
labels[i], labels[j], p_vals_matrix[i, j])
if significant[j] != "":
pval_message = significant[j] + " " + pval_message
cohen_d_value = effect_size_matrix[i, j]
delta_difference = np.mean(deltas[i]) - np.mean(deltas[j])
ci_low, ci_upp = conf_intervals[i, j]
ci_p = 100 * (1 - significance) # Confidence interval percentage
pval_message = f"{pval_message:<35} Cohen's d: {cohen_d_value:<10.2f} CI ({ci_p}%): [{ci_low:<6.2f}, {ci_upp:<6.2f}]"
pval_message += f" Delta difference: {delta_difference:<15.2f}"
print(pval_message)

# Use visualizer
Expand Down
2 changes: 2 additions & 0 deletions src/ageml/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import os
import sys

import numpy as np


def insert_newlines(text, nwords):
"""Function to insert a new line every n words."""
Expand Down

0 comments on commit c02c478

Please sign in to comment.