-
Notifications
You must be signed in to change notification settings - Fork 0
/
lab1_script2.R
107 lines (75 loc) · 2.81 KB
/
lab1_script2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#setwd("C:\Users\st70983\Downloads")
mydata = read.csv("mydata2_21var.csv", header=F)
#save.image("C:\\Users\\st70983\\Downloads\\workspace")
mydata = mydata[, 1]
mydata
plot(mydata, type="l")
h = hist(mydata)
xfit = seq(min(mydata ),max(mydata ),length=100)
yfit = dnorm(xfit,mean=mean(mydata ),sd=sd(mydata ))
yfit = yfit*diff(h$mids[1:2])*length(yfit)
h = hist(mydata, ylim=c(0,max(yfit)))
lines(xfit, yfit, col="red", lwd=2)
# cumulative
plot(ecdf(mydata))
plot(ecdf(mydata), verticals = TRUE, do.points = FALSE)
yfit = rnorm(length(mydata ),mean=mean(mydata),sd=sd(mydata))
plot(ecdf(yfit), verticals = TRUE, do.points = FALSE, add=TRUE, col="red")
############ EXAMPLE
rm(list = ls()) # clean all objects from the workspace
# command ?ls()? shows you names of all used variables
########## LOAD DATA
#sunspot.year = read.csv("sunspot_year.csv") # read data from CSV file
# Since it is an example, the data set is taken from the basic package ?datasets?,
# so you can work with the set just using it name (without reading from the CSV)
#sunspot.year
########## PLOTS
# scatter plot
#plot(sunspot.year)
# histogram
#hist(sunspot.year)
# add normal curve
#h = hist(sunspot.year)
#xfit = seq(min(sunspot.year),max(sunspot.year),length=100)
#yfit = dnorm(xfit,mean=mean(sunspot.year),sd=sd(sunspot.year))
#yfit = yfit*diff(h$mids[1:2])*length(yfit )
#lines(xfit, yfit, col="red", lwd=2)
# cumulative distribution function plot
#plot(ecdf(sunspot.year))
#plot(ecdf(sunspot.year ), verticals = TRUE, do.points = FALSE)
#yfit = rnorm(length(sunspot.year),mean=mean(sunspot.year),sd=sd(sunspot.year))
#plot(ecdf(yfit), verticals = TRUE, do.points = FALSE, add=TRUE, col="red")
########## DESCRIPTIVE STATISTICS
#summary(sunspot.year)
summary(mydata)
sd(mydata)
var(mydata)
# to calculate skewness and kurtosis, you should install and load package ?fBasics?
# (if you want you can use any other package, to get the right result the main thing )
install.packages("fBasics") # install
library("fBasics") # load
skewness(mydata)
kurtosis(mydata)
# Variance for skewness and kurtosis
N = length(mydata)
v_skew = sqrt(6*N*(N-1) / ((N-2)*(N+1)*(N+3))) # must be nearly equal to others
v_kur = sqrt(4*(N^2-1)*v_skew^2 / ((N-3)*(N+5))) # must be less than one
# Confidence interval for mean
t.test(mydata, conf.level=0.9)$conf.int
t.test(mydata, conf.level=0.95)$conf.int
t.test(mydata, conf.level=0.99)$conf.int
########## QUANTILES
# get quantile value
quantile(mydata, 0.9)
quantile(mydata, 0.95)
quantile(mydata, 0.5)
normaldata = seq(min(mydata ),max(mydata ),length=100)
normaldata = dnorm(xfit,mean=mean(mydata ),sd=sd(mydata ))
quantile(normaldata, 0.9)
quantile(normaldata, 0.95)
# get quantile number knowing quantile value
#ecdf(sunspot.year)( 1.5 )
ecdf(mydata)(1.5)
ecdf(mydata)(2)
ecdf(mydata)(8.2)
1 - ecdf(mydata)(10.5)