-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path_workshop - script - solutions - ch 3.R
executable file
·128 lines (92 loc) · 2.96 KB
/
_workshop - script - solutions - ch 3.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
####--- | solution: LEGO ---####
# load data (offline)
lego <- read.csv("LEGOsets.csv")
# load data (online)
lego <- read.csv("https://raw.githubusercontent.com/rikvosters/Basics-in-R/master/LEGOsets.csv")
# first six rows
head(lego)
# histogram
hist(lego$year)
# first
min(lego$year)
# names in 1955
lego$name[lego$year == 1955]
####--- | solution: negation ---####
# load
neg <- read.csv("https://raw.githubusercontent.com/rikvosters/Basics-in-R/master/negation.csv")
head(neg)
# base package
table(neg$Negation)
# subset SOV v SVO
table(neg$Negation[neg$WordOrder == "SOV"])
table(neg$Negation[neg$WordOrder == "SVO"])
# tip (cf. later)
table(neg$Negation, neg$WordOrder)
####--- | solution: catholic fertility ---####
# install and load
library(datasets) # once: install.packages("datasets")
# save as new df
helvetica <- swiss
# first ten rows
head(helvetica, 10)
# rows for districts where Catholic > 50
helvetica[helvetica$Catholic > 50,]
# mean fertility rate in these catholic districts compared to districts with Catholic <= 50
mean(helvetica$Fertility[helvetica$Catholic > 50])
mean(helvetica$Fertility[helvetica$Catholic <= 50])
# alternative - new dataframe based on subset:
helvetica_catolica <- helvetica[helvetica$Catholic > 50,]
helvetica_heretica <- helvetica[helvetica$Catholic <= 50,]
mean(helvetica_catolica$Fertility)
mean(helvetica_heretica$Fertility)
####--- | solution: shark attacks ---####
# load and explore
shark <- read.csv("https://raw.githubusercontent.com/rikvosters/Basics-in-R/master/SharkAttacks.csv", sep="\t", quote="")
shark
# died of shark attack in 2017
shark %>%
filter(Year == 2017) %>%
filter(Fatal == "Y") %>%
tally()
# base package alternative:
length(shark$Name[shark$Year == 2017 & shark$Fatal == "Y"])
# check before and after Jaws (1975)
shark %>%
filter(Year < 1975 & Year > 1965) %>%
tally()
shark %>%
filter(Year > 1975 & Year < 1985) %>%
tally()
# base package alternative:
length(shark$Name[shark$Year < 1975 & shark$Year > 1965])
length(shark$Name[shark$Year > 1975 & shark$Year < 1985])
# oldest Australian to die
shark %>%
filter(Fatal == "Y") %>%
filter(Country == "AUSTRALIA") %>%
arrange(desc(Age)) %>%
select(Age) %>%
head(1)
shark %>%
filter(Fatal == "Y") %>%
filter(Country == "AUSTRALIA") %>%
select(Age) %>%
max(na.rm = T)
# base package alternative:
max(shark$Age[shark$Fatal == "Y" & shark$Country == "AUSTRALIA"], na.rm = T)
# names of all 20C NZ victims under 16
shark %>%
filter(Age < 16 ) %>%
filter(Year > 1900 ) %>%
filter(Year <= 2000 ) %>%
filter(Country == "NEW ZEALAND") %>%
select(Name)
# base package alternative:
shark$Name[shark$Age < 16 & shark$Year > 1900 & shark$Country == "NEW ZEALAND"]
unique(shark$Name[shark$Age < 16 & shark$Year > 1900 & shark$Country == "NEW ZEALAND"])
# country names to lower case
haai %>%
mutate(Country_lowercase = tolower(Country)) -> haai
head(haai)
# base package alternative:
haai$Country <- tolower(haai$Country)