-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwstep_skrypt.R
61 lines (42 loc) · 1.36 KB
/
wstep_skrypt.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#Skrypt do æwiczeñ - Erementarz - 6.04.18
data <- read.csv("dane/german_credit_data.csv",
encoding = "UTF-8")
# wymiary tabeli
dim(data)
# pierwsze wiersze z tabeli
data[1:4,1:11]
head(data)
sapply(data, class)
data2<-data
data2$Job <- factor(x = as.character(data$Job),
labels =
c('unskilled and non-resident',
'unskilled and resident',
'skilled','highly skilled'))
head(data2$Job)
data2$Risk<-as.numeric(x=data$Risk,labels=c(1,2))
head(data2$Risk)#1-bad, 2-good
# podsumowanie tabeli
summary(data)
head(is.na(data))
data2<-na.omit(data)
library(sqldf)
zapytanie1<-"select Age, Sex, Risk from data2 order by Age desc limit 5"
sqldf(zapytanie1)
zapytanie0<-read.csv.sql("dane/german_credit_data.csv",
sql = "select Age, Sex, Risk from file order by Age desc limit 5")
sqldf(zapytanie0)
zapytanie2 = "select avg(Age) as sredni_wiek from data2 where Sex='male' and Risk=1"
sqldf(zapytanie2)
zapytanie4 = 'select Sex,Risk, avg(Age) as [Wiek], avg("Credit.amount") as [Kwota kredytu] from data2 group by Sex,Risk'
sqldf(zapytanie4)
library(dplyr)
select(data2,Age,Sex,Risk)
##Konstrukcja chain
data2 %>%
select(Age,Sex,Risk) %>%
arrange(Age) %>%
top_n(5)
data2 %>%
filter (Sex=='male', Risk==1) %>%
summarise(sredni_wiek=mean(Age))