Skip to content

Commit 905e124

Browse files
committed
Added helper function that describes each microarray data set briefly in a single data.frame.
1 parent 20ff1f6 commit 905e124

File tree

4 files changed

+66
-1
lines changed

4 files changed

+66
-1
lines changed

NAMESPACE

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
export(describe_data)

R/datamicroarray.r

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#' A Collection of Small-Sample, High-Dimensional Microarray Data Sets to Assess
2+
#' Machine Learning Algorithms and Models
3+
#'
4+
#' The R package \code{datamicroarray} provides a collection of small-sample,
5+
#' high-dimensional microarray data sets to assess machine learning algorithms
6+
#' and models. We have also included scripts to download, process, and load the
7+
#' original data sets from their sources.
8+
#'
9+
#' A thorough description and additional information of each data set is provided
10+
#' on the project's wiki at \url{https://github.com/ramey/datamicroarray}.
11+
#'
12+
#' @docType package
13+
#' @name datamicroarray
14+
#' @aliases datamicroarray package-datamicroarray
15+
NULL
16+

R/describe-data.r

+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#' Brief description of all microarray data sets in the 'datamicroarray' package.
2+
#'
3+
#' For all data sets in the \code{datamicroarray} package, we summarize them in a
4+
#' \code{data.frame}.
5+
#'
6+
#' @export
7+
#' @return data.frame describing each data set in the package. The columns of the
8+
#' data frame are:
9+
#' \describe{
10+
#' \item{author}{The first author's name}
11+
#' \item{year}{The year that the data set's article was published}
12+
#' \item{n}{The sample size}
13+
#' \item{p}{The number of features}
14+
#' \item{K}{The number of classes}
15+
#' \item{Disease}{The disease studied, if applicable}
16+
#' }
17+
describe_data <- function() {
18+
data_summary <- rbind.data.frame(
19+
c("alon", 1999, 62, 2000, 2, "Colon Cancer"),
20+
c("borovecki", 2005, 31, 22283, 2, "Huntington's Disease"),
21+
c("burczynski", 2006, 127, 22283, 3, "Crohn's Disease"),
22+
c("chiaretti", 2004, 111, 12625, 2, "Leukemia"),
23+
c("chin", 2006, 118, 22215, 2, "Breast Cancer"),
24+
c("chowdary", 2006, 104, 22283, 2, "Breast Cancer"),
25+
c("christensen", 2009, 217, 1413, 3, "N/A"),
26+
c("golub", 1999, 72, 7129, 3, "Leukemia"),
27+
c("gordon", 2002, 181, 12533, 2, "Lung Cancer"),
28+
c("gravier", 2010, 168, 2905, 2, "Breast Cancer"),
29+
c("khan", 2001, 63, 2308, 4, "SRBCT"),
30+
c("pomeroy", 2002, 60, 7128, 2, "CNS Tumor"),
31+
c("shipp", 2002, 58, 6817, 2, "Lymphoma"),
32+
c("singh", 2002, 102, 12600, 2, "Prostate Cancer"),
33+
c("sorlie", 2001, 85, 456, 5, "Breast Cancer"),
34+
c("su", 2002, 102, 5565, 4, "N/A"),
35+
c("subramanian", 2005, 50, 10100, 2, "N/A"),
36+
c("tian", 2003, 173, 12625, 2, "Myeloma"),
37+
c("west", 2001, 49, 7129, 2, "Breast Cancer"),
38+
c("yeoh", 2002, 248, 12625, 6, "Leukemia")
39+
)
40+
colnames(data_summary) <- c('author', 'year', 'n', 'p', 'K', 'Disease')
41+
data_summary$n <- as.integer(as.character(data_summary$n))
42+
data_summary$p <- as.integer(as.character(data_summary$p))
43+
data_summary$K <- as.integer(as.character(data_summary$K))
44+
45+
data_summary
46+
}
47+
48+
49+

R/help.r

-1
This file was deleted.

0 commit comments

Comments
 (0)