-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcorrmeasbib.bib
138 lines (130 loc) · 12.6 KB
/
corrmeasbib.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
@article{szekely_measuring_2007,
title = {Measuring and testing dependence by correlation of distances},
volume = {35},
issn = {0090-5364},
url = {http://projecteuclid.org/euclid.aos/1201012979},
doi = {10.1214/009053607000000505},
abstract = {Distance correlation is a new measure of dependence between random vectors. Distance covariance and distance correlation are analogous to product-moment covariance and correlation, but unlike the classical definition of correlation, distance correlation is zero only if the random vectors are independent. The empirical distance dependence measures are based on certain Euclidean distances between sample elements rather than sample moments, yet have a compact representation analogous to the classical covariance and correlation. Asymptotic properties and applications in testing independence are discussed. Implementation of the test and Monte Carlo results are also presented.},
language = {{EN}},
number = {6},
urldate = {2013-04-03},
journal = {The Annals of Statistics},
author = {Sz\'{e}kely, Gábor J. and Rizzo, Maria L. and Bakirov, Nail K.},
month = dec,
year = {2007},
note = {Mathematical Reviews number ({MathSciNet):} {MR2382665;} Zentralblatt {MATH} identifier: 1129.62059},
pages = {2769--2794}
},
@article{reshef_detecting_2011,
title = {Detecting Novel Associations in Large Data Sets},
volume = {334},
issn = {0036-8075, 1095-9203},
url = {http://www.sciencemag.org/content/334/6062/1518},
doi = {10.1126/science.1205438},
abstract = {Identifying interesting relationships between pairs of variables in large data sets is increasingly important. Here, we present a measure of dependence for two-variable relationships: the maximal information coefficient ({MIC).} {MIC} captures a wide range of associations both functional and not, and for functional relationships provides a score that roughly equals the coefficient of determination (R2) of the data relative to the regression function. {MIC} belongs to a larger class of maximal information-based nonparametric exploration ({MINE)} statistics for identifying and classifying relationships. We apply {MIC} and {MINE} to data sets in global health, gene expression, major-league baseball, and the human gut microbiota and identify known and novel relationships.},
language = {en},
number = {6062},
urldate = {2013-04-03},
journal = {Science},
author = {Reshef, David N. and Reshef, Yakir A. and Finucane, Hilary K. and Grossman, Sharon R. and {McVean}, Gilean and Turnbaugh, Peter J. and Lander, Eric S. and Mitzenmacher, Michael and Sabeti, Pardis C.},
month = dec,
year = {2011},
pages = {1518--1524},
file = {Full Text PDF:C:\Users\mclark19\AppData\Roaming\Mozilla\Firefox\Profiles\kfv56tr7.default\zotero\storage\EBKX8EMB\Reshef et al. - 2011 - Detecting Novel Associations in Large Data Sets.pdf:application/pdf;Snapshot:C:\Users\mclark19\AppData\Roaming\Mozilla\Firefox\Profiles\kfv56tr7.default\zotero\storage\BF4XDSF5\1518.html:text/html}
},
@article{fujita_comparing_2009,
title = {Comparing {P}earson, {S}pearman and {H}oeffding's {D} measure for gene expression association analysis},
volume = {7},
issn = {0219-7200},
abstract = {{DNA} microarrays have become a powerful tool to describe gene expression profiles associated with different cellular states, various phenotypes and responses to drugs and other extra- or intra-cellular perturbations. In order to cluster co-expressed genes and/or to construct regulatory networks, definition of distance or similarity between measured gene expression data is usually required, the most common choices being Pearson's and Spearman's correlations. Here, we evaluate these two methods and also compare them with a third one, namely Hoeffding's D measure, which is used to infer nonlinear and non-monotonic associations, i.e. independence in a general sense. By comparing three different variable association approaches, namely Pearson's correlation, Spearman's correlation and Hoeffding's D measure, we aimed at assessing the most appropriate one for each purpose. Using simulations, we demonstrate that the Hoeffding's D measure outperforms Pearson's and Spearman's approaches in identifying nonlinear associations. Our results demonstrate that Hoeffding's D measure is less sensitive to outliers and is a more powerful tool to identify nonlinear and non-monotonic associations. We have also applied Hoeffding's D measure in order to identify new putative genes associated with tp53. Therefore, we propose the Hoeffding's D measure to identify nonlinear associations between gene expression profiles.},
language = {eng},
number = {4},
journal = {Journal of bioinformatics and computational biology},
author = {Fujita, André and Sato, João Ricardo and Demasi, Marcos Angelo Almeida and Sogayar, Mari Cleide and Ferreira, Carlos Eduardo and Miyano, Satoru},
month = aug,
year = {2009},
note = {{PMID:} 19634197},
keywords = {Algorithms, Cluster Analysis, Computer Simulation, Gene Expression Profiling, Models, Biological, Proteome, Signal Transduction},
pages = {663--684}
},
@article{reshef_equitability_2013,
title = {Equitability Analysis of the Maximal Information Coefficient, with Comparisons},
url = {http://arxiv.org/abs/1301.6314},
abstract = {A measure of dependence is said to be equitable if it gives similar scores to equally noisy relationships of different types. Equitability is important in data exploration when the goal is to identify a relatively small set of strongest associations within a dataset as opposed to finding as many non-zero associations as possible, which often are too many to sift through. Thus an equitable statistic, such as the maximal information coefficient ({MIC)}, can be useful for analyzing high-dimensional data sets. Here, we explore both equitability and the properties of {MIC}, and discuss several aspects of the theory and practice of {MIC.} We begin by presenting an intuition behind the equitability of {MIC} through the exploration of the maximization and normalization steps in its definition. We then examine the speed and optimality of the approximation algorithm used to compute {MIC}, and suggest some directions for improving both. Finally, we demonstrate in a range of noise models and sample sizes that {MIC} is more equitable than natural alternatives, such as mutual information estimation and distance correlation.},
urldate = {2013-04-17},
journal = {{arXiv:1301.6314}},
author = {Reshef, David and Reshef, Yakir and Mitzenmacher, Michael and Sabeti, Pardis},
month = jan,
year = {2013},
keywords = {Computer Science - Learning, Quantitative Biology - Quantitative Methods, Statistics - Machine Learning},
file = {1301.6314 PDF:C:\Users\mclark19\AppData\Roaming\Mozilla\Firefox\Profiles\kfv56tr7.default\zotero\storage\NMGJ4HR5\Reshef et al. - 2013 - Equitability Analysis of the Maximal Information C.pdf:application/pdf;arXiv.org Snapshot:C:\Users\mclark19\AppData\Roaming\Mozilla\Firefox\Profiles\kfv56tr7.default\zotero\storage\F2MM3RUN\1301.html:text/html}
},
@article{kinney_equitability_2013,
title = {Equitability, mutual information, and the maximal information coefficient},
url = {http://arxiv.org/abs/1301.7745},
abstract = {Reshef et al. recently proposed a new statistical measure, the "maximal information coefficient" ({MIC)}, for quantifying arbitrary dependencies between pairs of stochastic quantities. {MIC} is based on mutual information, a fundamental quantity in information theory that is widely understood to serve this need. {MIC}, however, is not an estimate of mutual information. Indeed, it was claimed that {MIC} possesses a desirable mathematical property called "equitability" that mutual information lacks. This was not proven; instead it was argued solely through the analysis of simulated data. Here we show that this claim, in fact, is incorrect. First we offer mathematical proof that no (non-trivial) dependence measure satisfies the definition of equitability proposed by Reshef et al.. We then propose a self-consistent and more general definition of equitability that follows naturally from the Data Processing Inequality. Mutual information satisfies this new definition of equitability while {MIC} does not. Finally, we show that the simulation evidence offered by Reshef et al. was artifactual. We conclude that estimating mutual information is not only practical for many real-world applications, but also provides a natural solution to the problem of quantifying associations in large data sets.},
urldate = {2013-04-18},
journal = {{arXiv:1301.7745}},
author = {Kinney, Justin B. and Atwal, Gurinder S.},
month = jan,
year = {2013},
keywords = {Mathematics - Statistics Theory, Quantitative Biology - Quantitative Methods, Statistics - Machine Learning, Statistics - Methodology},
file = {1301.7745 PDF:C:\Users\mclark19\AppData\Roaming\Mozilla\Firefox\Profiles\kfv56tr7.default\zotero\storage\9W2AH99U\Kinney and Atwal - 2013 - Equitability, mutual information, and the maximal .pdf:application/pdf;arXiv.org Snapshot:C:\Users\mclark19\AppData\Roaming\Mozilla\Firefox\Profiles\kfv56tr7.default\zotero\storage\DMSPDVBS\1301.html:text/html}
},
@article{heller_consistent_2012,
title = {A consistent multivariate test of association based on ranks of distances},
url = {http://arxiv.org/abs/1201.3522},
doi = {10.1093/biomet/ass070},
abstract = {We are concerned with the detection of associations between random vectors of any dimension. Few tests of independence exist that are consistent against all dependent alternatives. We propose a powerful test that is applicable in all dimensions and is consistent against all alternatives. The test has a simple form and is easy to implement. We demonstrate its good power properties in simulations and on examples.},
urldate = {2013-04-19},
journal = {{arXiv:1201.3522}},
author = {Heller, Ruth and Heller, Yair and Gorfine, Malka},
month = jan,
year = {2012},
note = {Biometrika 2012},
keywords = {Statistics - Methodology},
file = {1201.3522 PDF:C:\Users\mclark19\AppData\Roaming\Mozilla\Firefox\Profiles\kfv56tr7.default\zotero\storage\84BJJ3BM\Heller et al. - 2012 - A consistent multivariate test of association base.pdf:application/pdf;arXiv.org Snapshot:C:\Users\mclark19\AppData\Roaming\Mozilla\Firefox\Profiles\kfv56tr7.default\zotero\storage\N4AESNDF\1201.html:text/html}
},
@article{szekely_brownian_2009,
title = {Brownian distance covariance},
volume = {3},
issn = {1932-6157},
url = {http://projecteuclid.org/euclid.aoas/1267453933},
doi = {10.1214/09-AOAS312},
abstract = {Distance correlation is a new class of multivariate dependence
coefficients applicable to random vectors of arbitrary and not
necessarily equal dimension. Distance covariance and distance
correlation are analogous to product-moment covariance and
correlation, but generalize and extend these classical bivariate
measures of dependence. Distance correlation characterizes
independence: it is zero if and only if the random vectors are
independent. The notion of covariance with respect to a
stochastic process is introduced, and it is shown that
population distance covariance coincides with the covariance
with respect to Brownian motion; thus, both can be called
Brownian distance covariance. In the bivariate case, Brownian
covariance is the natural extension of product-moment
covariance, as we obtain Pearson product-moment covariance by
replacing the Brownian motion in the definition with identity.
The corresponding statistic has an elegantly simple computing
formula. Advantages of applying Brownian covariance and
correlation vs the classical Pearson covariance and correlation
are discussed and illustrated.},
language = {{EN}},
number = {4},
urldate = {2013-04-19},
journal = {The Annals of Applied Statistics},
author = {Sz\'{e}kely, Gábor J. and Rizzo, Maria L.},
month = dec,
year = {2009},
note = {Mathematical Reviews number ({MathSciNet):} {MR2752127}},
pages = {1236--1265}
}
@article{simontibs_2011,
title = {{Comment on "Detecting Novel Associations in Large Data Sets" by Reshef et al}},
author = {Simon, Noah and Tibshirani, Robert},
url = {http://www-stat.stanford.edu/~tibs/reshef/comment.pdf},
month = dec,
year = {2011},
pages = {1--3}
}