-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
17 changed files
with
4,920 additions
and
480 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
,,% correct,percentile,t,df,p | ||
Face recognition,CMFT 1,100,72,0.59,26,0.561 | ||
Face recognition,CMFT 2,37,0,-3.40,26,0.002 | ||
Face recognition,CMFT 3,33,2,-2.25,26,0.033 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
"""Compute statistics for comparing individual against group""" | ||
|
||
__version__ = '0.0.1' | ||
__author__ = 'Martin Wegrzyn' | ||
|
||
# import modules objects | ||
from .percentile_scores import * | ||
from .ttest_single import * | ||
from .norm_stats import * |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
"""Function to make output dataframe""" | ||
|
||
# import required modules | ||
import numpy as np | ||
from scipy import stats | ||
from .percentile_scores import * | ||
from .ttest_single import * | ||
|
||
|
||
def make_percentiles(group_mean, group_std, pat_mean): | ||
|
||
group_array = np.random.normal(loc = group_mean, scale = group_std, size = 1000000) | ||
group_array = group_array.reshape(-1,1) | ||
pat_array = np.array([pat_mean]).reshape(-1,1) | ||
percentile_score = get_percentiles(group_array,pat_array) | ||
|
||
return percentile_score | ||
|
||
|
||
def make_tstats_df(df,pat_idx): | ||
|
||
# get summary statistics of control group | ||
group_df = df.drop(pat_idx).apply(['mean','std','count']) | ||
# get raw data of patient | ||
pat_df = df.loc[[pat_idx]] | ||
# hard-code the patient name to be (upper-case) 'JB' | ||
pat_df.index = ['JB'] | ||
# combine | ||
stats_df = pd.concat([group_df,pat_df]) | ||
|
||
# initialize dictionary to write results to | ||
d = {} | ||
# for each task/feature | ||
for c in stats_df.columns: | ||
# get the data necessary to compute a ttest | ||
pat_mean = stats_df.loc['JB',c] | ||
group_mean = stats_df.loc['mean',c] | ||
group_std = stats_df.loc['std',c] | ||
group_n = stats_df.loc['count',c] | ||
# compute the ttest | ||
t,df,p = ttest_single(pat_mean,group_mean,group_std,group_n) | ||
# add to dictionary | ||
d[c] = {'t':'%.2f'%t,'df':int(df),'p':'%.3f'%p} | ||
|
||
# turn dict into df | ||
ttest_df = pd.DataFrame(d).T | ||
# custom order | ||
ttest_df = ttest_df.loc[:,['t','df','p']] | ||
|
||
return ttest_df |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
"""Functions for computing percentile scores""" | ||
|
||
# import required modules | ||
import pandas as pd | ||
import numpy as np | ||
from scipy import stats | ||
from sklearn import preprocessing | ||
|
||
|
||
# get z scores and transform to percentiles | ||
def get_percentiles(con_data,pat_data): | ||
"""Transform the patient's raw score into a percentile score | ||
This function uses the z-distribution and then transforms | ||
the z-values into percentile scores | ||
Parameters | ||
---------- | ||
con_data : (n,1) shaped numpy array | ||
Can be a list, array or pandas-series of values | ||
All the data of the controls | ||
pat_data : (1,1) shaped numpy array | ||
data of the patient | ||
Returns | ||
------- | ||
the percentile score of the patient | ||
""" | ||
# get transformation parameters from controls | ||
my_scaler = preprocessing.StandardScaler() | ||
my_scaler.fit(con_data) | ||
# apply to patient to get the z-score | ||
z = my_scaler.transform(pat_data)[-1] | ||
# transform z-scores to cumulative distribution | ||
cdf = stats.norm.cdf(z)[-1] | ||
# scale from 0 to 100 instead of 0 to 1 | ||
percentile = cdf*100 | ||
# get z out of array | ||
z = z[-1] | ||
return z, percentile | ||
|
||
|
||
# apply to each column of a df | ||
def make_percentile_df(df,pat_idx): | ||
"""compare percentiles for all columns of a df | ||
Applies the get_percentiles function to all columns of a dataframe | ||
Parameters | ||
---------- | ||
df : pandas dataframe | ||
table with values | ||
Returns | ||
------- | ||
pc_df : pandas dataframe | ||
table with percentiles of patient | ||
""" | ||
|
||
# make a new dataframe | ||
pc_df = pd.DataFrame() | ||
# for each column of the input dataframe | ||
for c in df.columns: | ||
# this will only work for columns with all values being numbers | ||
|
||
# transform values to percentile scores | ||
this_df = df.loc[:,c] | ||
con_data = this_df.drop(pat_idx).values | ||
pat_data = this_df.loc[pat_idx] | ||
|
||
con_data = con_data.reshape(-1,1) | ||
pat_data = np.array([pat_data]).reshape(-1,1) | ||
|
||
con_data.shape,pat_data.shape | ||
z, pc = get_percentiles(con_data, pat_data) | ||
|
||
d = {c:{'z':z,'percentile':pc}} | ||
# transform values to dataframe | ||
this_pc = pd.DataFrame(d).T | ||
# add to big dataframe | ||
pc_df = pd.concat([pc_df,this_pc],axis=0) | ||
|
||
|
||
return pc_df |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
"""Functions for computing t-tests""" | ||
|
||
# import required modules | ||
import numpy as np | ||
from scipy import stats | ||
|
||
# main function | ||
def ttest_single(x_pat,x_group,s_group,n): | ||
|
||
"""Compare one person with a sample of multiple persons | ||
The formula to compute the t-value is as follows:: | ||
(x_pat - x_group) / (s_group * sqrt( ( n+1 )/n ) ) | ||
If the t-value obtained is negative and its magnitude exceeds the one-tailed | ||
5% critical value for t on n-1 degrees of freedom, | ||
then it can be concluded that the patient's score is sufficiently | ||
low to enable rejection of the null hypothesis that it is an observation | ||
from the scores of the control population | ||
(the patient is therefore considered to exhibit an impairment on the task in question) | ||
Parameters | ||
---------- | ||
x_pat : float or int | ||
Patient's score | ||
x_group : float or int | ||
Mean of the scores in the control sample | ||
s_group : float or int | ||
Standard deviation of scores in the control sample | ||
n : int | ||
The size of the control sample | ||
Returns | ||
------- | ||
t : float | ||
t-value for the comparison of patient with controls | ||
df : int | ||
Degrees of freedom, computed as n-1 | ||
p : float | ||
two-tailed p-value for t | ||
Notes | ||
----- | ||
This function applies the formula described in | ||
Crawford, J. R., Garthwaite, P. H., & Howell, D. C. (2009). | ||
On comparing a single case with a control sample: An alternative perspective. | ||
Neuropsychologia, 47(13), 2690-2695. | ||
Examples | ||
-------- | ||
Say that the patient and the group have equal means | ||
and the samle size is 100. Then the following results | ||
should be produced: | ||
t-value is zero (no difference at all) | ||
df's are 99 (sample size of 100 minus 1) | ||
p-value is one (not significant at all) | ||
>>> 't=%.10f, df=%.1f, p=%.10f'%ttest_single(10,10,1,100) | ||
't=0.0000000000, df=99.0, p=1.0000000000' | ||
Say that the patient is two standard deviations below | ||
the controls. Then, - given a large enough sample - | ||
the result should be significant: | ||
>>> 't=%.10f, df=%.1f, p=%.10f'%ttest_single(8,10,1,100) | ||
't=-1.9900743804, df=99.0, p=0.0493404337' | ||
In too small a sample, the difference will not be significant | ||
>>> 't=%.10f, df=%.1f, p=%.10f'%ttest_single(8,10,1,10) | ||
't=-1.9069251785, df=9.0, p=0.0888979325' | ||
Nor will a smaller difference be significant in a large sample | ||
>>> 't=%.10f, df=%.1f, p=%.10f'%ttest_single(8.1,10,1,100) | ||
't=-1.8905706614, df=99.0, p=0.0616060466' | ||
""" | ||
|
||
# numerator of the formula | ||
num = ( float(x_pat) - x_group ) | ||
# denominator of the formula | ||
delim = (s_group*np.sqrt( (n+1.0)/n) ) | ||
|
||
# t-value | ||
t = num/delim | ||
|
||
# degrees of freedom | ||
df = n-1 | ||
|
||
# two-sided p-value (more conservative than the one-sided p suggested in the cited article) | ||
p = stats.t.sf(np.abs(t), df)*2 | ||
|
||
return t,df,p | ||
|
||
# run doctests | ||
import doctest | ||
doctest.run_docstring_examples(ttest_single,globals(),verbose=False,name='ttest_single') |
Oops, something went wrong.