-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmulti_distr.R
118 lines (99 loc) · 4.08 KB
/
multi_distr.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#' ---
#' title: "Multivariate data"
#' author: "Yohan"
#' output:
#' html_document:
#' preserve_yaml: true
#' toc: true
#' toc_float: true
#' keep_md: true
#' ---
#+ r setup, include = FALSE
knitr::opts_chunk$set(message = FALSE, warning = FALSE)
library(gapminder)
library(dplyr)
library(ggplot2)
library(tidyr)
library(tidyverse)
library(corrplot)
library(lattice)
library(GGally)
library(scatterplot3d)
library(BDgraph)
library(mvtnorm)
library(MVN)
library(sn)
#' Multivariate normality is required for regression, model-based clustering, PCA and ANOVA. Then how to test? qqplot (qqnorm, qqline) to detect heavier tail, skewness, outliers, and clustered data. If any single variable fails to be a normality, we can not have joint multivariate.
#+ r
wine <- read.table("http://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data", sep = ",")
View(wine)
colnames(wine) <- c('Type', 'Alcohol', 'Malic', 'Ash', 'Alcalinity', 'Magnesium', 'Phenols', 'Flavanoids', 'Nonflavanoids','Proanthocyanins', 'Color', 'Hue', 'Dilution', 'Proline')
wine$Type <- as.factor(wine$Type)
#' ## plots
var.wine <- var(wine[2:5])
cor.wine <- cor(wine[, 2:5])
corrplot(cor.wine, method = "ellipse")
pairs(wine[,2:5])
splom( ~ wine[,2:5], pch = 16, col = wine$Type)
(wine.gg <- ggpairs(data = wine, columns = 2:5))
scatterplot3d(wine[, c(2, 3, 5)], color = wine$Type, angle = 70)
#' ## Multivariate normal samples
mu.sim <- c(2, -2)
sigma.sim <- matrix(c(9,5,5,4), 2,2)
multnorm.sample <- rmvnorm(n = 100, mean = mu.sim, sigma = sigma.sim)
head(multnorm.sample)
plot(multnorm.sample)
#' ## Density
multnorm.dens <- dmvnorm(multnorm.sample, mean = mu.sim, sigma = sigma.sim)
scatterplot3d(cbind(multnorm.sample, multnorm.dens),
color="blue", pch="", type = "h",
xlab = "x", ylab = "y", zlab = "density")
mvals <- expand.grid(seq(-5, 10, length.out = 40), seq(-8, 4, length.out = 40))
mvds <- dmvnorm(x = mvals, mean = mu.sim, sigma = sigma.sim)
matrix_mvds <- matrix(mvds, nrow = 40)
persp(matrix_mvds, theta = 80, phi = 30, expand = 0.6, shade = 0.2, col = "lightblue", xlab = "x", ylab = "y", zlab = "dens")
#' ## Probability
pmvnorm(lower = c(-1, -1), upper = c(1, 1))
pmvnorm(lower = c(-5, -5), upper = c(5, 5), mean = mu.sim, sigma = sigma.sim)
#' ## Quantile
qmvnorm(0.9, tail = "both", sigma = diag(2))
qmvnorm(0.95, tail = "both", mean = mu.sim, sigma = sigma.sim)
#' ## Normality test
qqnorm(multnorm.sample[, 1])
qqline(multnorm.sample[, 1])
mvn(multnorm.sample)
mvn(wine[, 2:5])
#' ## t distribution (e.g. financial stock time series)
#' ### rmvt, dmvt, qmvt, pmvt
multt.sample <- rmvt(n = 200,sigma = sigma.sim, df = 5, delta = mu.sim)
mvn(multt.sample, multivariatePlot = "qq")
multt.dens <- dmvt(x = multt.sample, delta = mu.sim, sigma = sigma.sim, df = 5, log = F)
scatterplot3d(cbind(multt.sample, multt.dens),
color = "blue", pch = "", type = "h",
xlab = "x", ylab = "y", zlab = "density")
pmvt(lower = c(-5, -5), upper = c(5, 5),
delta = mu.sim, df = 5, sigma = sigma.sim)
# CDF, e.g., Probability for all 3 stocks between $100 and 200.
qmvt(p = 0.9, tail = "both", sigma = diag(2)) # inverse CDF, showing the circle of radius for 90%
#' ## skew distribution
skewnorm.sample <- rmsn(n = 100, xi = mu.sim, Omega = sigma.sim, alpha = c(4, -4))
ggplot(as.data.frame(skewnorm.sample), aes(x = V1, y = V2)) +
geom_point() +
geom_density_2d()
mvn(skewnorm.sample, multivariatePlot = "qq")
xi <- c(1,2,-5)
omega <- matrix(c(1,1,0,
1,2,0,
0,0,5), 3,3)
alpha <- c(4,30,-5)
skew.s <- rmsn(n = 2000, xi = xi, Omega = omega, alpha = alpha)
ggpairs(data = as.data.frame(skew.s))
msn.mle(y = skew.s, opt.method = "BFGS")
skewt.s <- rmst(n = 2000, xi = xi, Omega = omega, alpha = alpha, nu = 4)
ggpairs(data = as.data.frame(skewt.s))
msn.mle(y = skewt.s, opt.method = "BFGS")
#' ## Multi dimentional scaling (MDS)
state.dist <- dist(wine[-1])
mds.state <- cmdscale(state.dist, k=3)
mds.state_df <- data.frame(mds.state)
scatterplot3d(mds.state_df, color = wine$Type, pch = 19, type = "h", lty.hplot = 2)