-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathسيمون صبري (4 بايو) G1_Allometry.R
116 lines (89 loc) · 3.32 KB
/
سيمون صبري (4 بايو) G1_Allometry.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#Read Dataset
mydata<-read.csv("c:/Users/RC/Desktop/myfile/G1_Allometry.csv",na.strings = c(""))
mydata
head(mydata) # display the first few rows of the dataset
head(mydata,10) # display the first 10 rows of the dataset
tail(mydata) # display the Last few rows of the dataset If not specified, it defaults to 6.
tail(mydata,10) #display the last 10 rows of the dataset
summary(mydata) # summary statistics of the dataset
str(mydata) # structure of the dataset
dim(mydata) # dimensions of the dataset
View(mydata) #View your dataset on R environment
nrow(mydata) #Know the number of rows
ncol(mydata) #Know the number of columns
colnames(mydata) #Know the names of columns
row.names(mydata) #Know the name of rows
#cleaning data
mydata$height<-gsub("“","",mydata$height)
mydata$height<-gsub("â€" , "",mydata$height)
View(mydata)
# convert data types
mydata$height <- as.numeric(mydata$height)
mydata$branchmass<-as.integer(mydata$branchmass)
mydata$height<-as.integer(mydata$height)
mydata$leafarea<-as.integer(mydata$branchmass)
str(mydata)
#drop Na values
library(tidyr)
cleaned_data<-drop_na(mydata)
is.na(clean_cleaned)
View(cleaned_data)
#Delet rows with Missing value or NA
mydata[! complete.cases(mydata), ]
mydata[is.na(mydata$height), ]
mynewData<-mydata[ ! is.na(mydata$height), ]
View(mynewData)
#get all locations of NA
complete.cases(mydata) # FALSE means the row contains NA
#Get all rows contain missing data.
mydata[! complete.cases(mydata), ]
is.na(mydata$height)
mising
# remove outliers
mydata <- mydata[mydata$diameter < 60, ]
mydata
#Ignore NA values in dataset
median(mydata[ ,"height"], na.rm = T)
# calculate mean and standard deviation
mean_diameter <- mean(mydata$diameter)
mean_diameter
sd_leafarea <- sd(mydata$leafarea)
sd_leafarea
# identify duplicates
duplicates <- duplicated(mydata)
duplicates
# remove duplicates
myuniquedata <- unique(mydata)
#recoding
mydata$diameter1[mydata$diameter<=30]='small diameter'
mydata$diameter1[mydata$diameter>=40 & mydata$diameter>=50]='medium diameter'
mydata$diameter1[mydata$diameter>=60]='large diameter'
View(mydata)
#Sorting
mysort<-cleaned_data[order(cleaned_data$species,cleaned_data$diameter),]
View(mysort) #Data after sorting
#Filter the dataset according to specific criteria
filter1<-cleaned_data[cleaned_data$diameter < 30,]
View(filter1)
filter1<-clleaned_data[cleaned_data$species=="PIMO",c(1,3,4)]
View(filter1)
# Data Visualization
#Install tidyverse package
install.packages("tidyverse")
library(ggplot2)
completed_data<-mydata[complete.cases(mydata),]
draw1<-ggplot(completed_data,aes(x=diameter,y=leafarea))+geom_point()
draw1
#Data visualizing using Histogram
draw_hist<-ggplot(mydata, aes(diameter,fill=species))
draw_hist + geom_histogram(binwidth = 7,alpha=1)+ggtitle("The Diameters")+
labs(x="The number of Diameter",y="The count")
#Data visualizing using Barchart
bar1<- ggplot(cleaned_data, aes(x= species,fill = branchmass))
bar1 + geom_bar() + theme_light()+labs(y="The Count",
title="The Branchmass Rate")+
facet_wrap(~ diameter)
#Data visualizing using ScatterBlot
scatter1<-ggplot(mydata, aes(diameter,leafarea))
scatter1+ geom_point(aes(color=species),fill="white")+
theme_light()+ stat_smooth(se=FALSE)