-
Notifications
You must be signed in to change notification settings - Fork 0
/
CaseStudy_02Markdown.Rmd
115 lines (101 loc) · 3.21 KB
/
CaseStudy_02Markdown.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
---
title: "DS6306 CaseStudy 02"
author: "Heindel Adu, Eric Fu, Stephen Johnson and Anthony Yueng"
date: "3/21/2019"
output: html_document
---
```{r setup, include=FALSE, message=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r libraryload, message=FALSE}
library(tidyverse)
library(dplyr)
library(plotly)
library(circlize)
library(caret)
library(readr)
```
```{r, data}
cs2data <- read_csv("CaseStudy2-data.csv")
cs2data
names(cs2data)
head(cs2data)
str(cs2data)
```
### Exploratory Data Analysis
```{r}
cs2data %>% summarise_all(list(name = ~n_distinct(.)))
unique(cs2data$BusinessTravel)
unique(cs2data$Age)
unique(cs2data$Attrition)
unique(cs2data$BusinessTravel)
unique(cs2data$Department)
unique(cs2data$DistanceFromHome)
unique(cs2data$Education)
unique(cs2data$EducationField)
unique(cs2data$EnvironmentSatisfaction)
unique(cs2data$Gender)
unique(cs2data$HourlyRate)
unique(cs2data$JobInvolvement)
unique(cs2data)
cs2data %>% summarise_all(funs(n_distinct(.)))
```
```{r}
library(plotly)
p <- cs2data %>%
plot_ly(width = 1000, height = 600) %>%
add_trace(type = 'parcoords',
line = list(color = ~Department,
colorscale = 'Jet',
showscale = TRUE,
reversescale = TRUE,
cmin = -4000,
cmax = -100),
dimensions = list(
list(range = c(~min(Age),~max(Age)),
constraintrange = c(15,65),
label = 'Age', values = ~Age),
list(range = c(~min(DailyRate),~max(DailyRate)),
label = 'Daily Rate', values = ~DailyRate),
list(tickvals = c(0,0.5,1,2,3),
ticktext = c('A','AB','B','Y','Z'),
label = 'Cyclinder Material', values = ~cycMaterial),
list(range = c(-1,4),
tickvals = c(0,1,2,3),
label = 'Block Material', values = ~blockMaterial),
list(range = c(~min(totalWeight),~max(totalWeight)),
visible = TRUE,
label = 'Total Weight', values = ~totalWeight),
list(range = c(~min(assemblyPW),~max(assemblyPW)),
label = 'Assembly Penalty Weight', values = ~assemblyPW),
list(range = c(~min(HstW),~max(HstW)),
label = 'Height st Width', values = ~HstW),
list(range = c(~min(minHW),~max(minHW)),
label = 'Min Height Width', values = ~minHW),
list(range = c(~min(minWD),~max(minWD)),
label = 'Min Width Diameter', values = ~minWD),
list(range = c(~min(rfBlock),~max(rfBlock)),
label = 'RF Block', values = ~rfBlock)
)
)
```
```{r}
#Create data
name=c(3,10,10,3,6,7,8,3,6,1,2,2,6,10,2,3,3,10,4,5,9,10)
feature=paste("feature ", c(1,1,2,2,2,2,2,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5) , sep="")
dat <- data.frame(name,feature)
dat <- with(dat, table(name, feature))
head(dat)
# Charge the circlize library
library(circlize)
# Make the circular plot
chordDiagram(as.data.frame(dat), transparency = 0.5)
```
Data Visualization
=========================================
Row
----------------------------------------
###Exploratory Data Analysis
```{r}
valueBox(length(cs2data$Attrition))
```