-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path2021_11.R
75 lines (53 loc) · 2.37 KB
/
2021_11.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# import library
library(tidyverse)
library(lubridate)
# load data
ride_data <- read.csv('202111-divvy-tripdata.csv')
glimpse(ride_data)
# convert datatype in column 'started_at' and 'ended_at' into datetime
ride_data$started_at <- as.POSIXct(ride_data$started_at)
ride_data$ended_at <- as.POSIXct(ride_data$ended_at)
# create dataset for member users
member_users <- ride_data %>% filter(member_casual == 'member')
save(member_users, file = 'member_users_2021_11.rda')
# create dataset for casual users
casual_users <- ride_data %>% filter(member_casual == 'casual')
save(casual_users, file = 'casual_users_2021_11.rda')
#################
### ALL USERS ###
#################
# calculate mean of ride time
mean(ride_data$ended_at - ride_data$started_at)
mean(ride_data$ended_at - ride_data$started_at)/60 # mean ride time in minutes
# count membership type
ride_data %>% count(member_casual)
# count bicycle type
ride_data %>% count(rideable_type)
# count start_station_id, sort ascending
options(max.print = 18) # display output only 6 rows
ride_data %>% count(start_station_id, start_station_name, sort = TRUE)
# save into dataframe for futher analysis
start_station_count <- ride_data %>% count(start_station_id, start_station_name)
View(start_station_count)
save(start_station_count, file = '2021_11_start_station_count.rda')
# count start_time
options(max.print = 12) # display output only 6 rows
ride_data %>% count(start_hour = hour(ride_data$started_at), sort = TRUE)
# save into dataframe
start_time_count <- ride_data %>% count(start_hour = hour(ride_data$started_at))
View(start_time_count)
save(start_time_count, file = '2021_11_start_time_count.rda')
# count end_station_id, sort ascending
options(max.print = 18) # display output only 6 rows
ride_data %>% count(end_station_id, end_station_name, sort = TRUE)
# save into dataframe
end_station_count <- ride_data %>% count(end_station_id, end_station_name)
View(end_station_count)
save(end_station_count, file = '2021_11_end_station_count.rda')
# count end_time
options(max.print = 12) # display output only 6 rows
ride_data %>% count(end_hour = hour(ride_data$ended_at), sort = TRUE)
# save into dataframe
end_time_count <- ride_data %>% count(end_hour = hour(ride_data$ended_at))
View(end_time_count)
save(end_time_count, file = '2021_11_end_time_count.rda')