-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdhfr-data-understanding.R
91 lines (64 loc) · 2 KB
/
dhfr-data-understanding.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
####################################
# Data Professor #
# http://youtube.com/dataprofessor #
# http://github.com/dataprofessor #
####################################
#########################
# Loading DHFR data set
#########################
# Method 1
library(datasets)
data(dhfr)
# Method 2
#dhfr2 <- datasets::dhfr
# Method 3
# install.packages("RCurl")
#library(RCurl)
dhfr <- read.csv(text = getURL("https://github.com/dataprofessor/data/raw/master/dhfr.csv") )
# View the data
View(dhfr)
#############################
# Display summary statistics
#############################
# head() / tail()
head(dhfr, 5)
tail(dhfr, 5)
# summary()
summary(dhfr)
summary(dhfr$Y)
# Check to see if there are missing data?
sum(is.na(dhfr))
# skimr() - expands on summary() by providing larger set of statistics
# install.packages("skimr")
# https://github.com/ropensci/skimr
library(skimr)
skim(dhfr) # Perform skim to display summary statistics
# Group data by Y (biological activity) then perform skim
dhfr %>%
dplyr::group_by(Y) %>%
skim()
#############################
# Quick data visualization
#
# R base plot()
#############################
# Panel plots
#plot(dhfr)
#plot(iris, col = "red")
# Scatter plot
plot(dhfr$moe2D_zagreb, dhfr$moe2D_weinerPol)
plot(dhfr$moe2D_zagreb, dhfr$moe2D_weinerPol, col = "red") # Makes red circles
plot(dhfr$moe2D_zagreb, dhfr$moe2D_weinerPol, col = dhfr$Y) # Color by Y
plot(dhfr$moe2D_zagreb, dhfr$moe2D_weinerPol, col = "red", # Makes red circles + Adds x and y axis labels
xlab = "moe2D_zagreb", ylab = "moe2D_weinerPol")
# Histogram
hist(dhfr$moe2D_zagreb)
hist(dhfr$moe2D_zagreb, col = "red") # Makes red bars
# Feature plots
# https://www.machinelearningplus.com/machine-learning/caret-package/
featurePlot(x = dhfr[,2:21],
y = dhfr$Y,
plot = "box",
strip=strip.custom(par.strip.text=list(cex=.7)),
scales = list(x = list(relation="free"),
y = list(relation="free")))