-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
include data and add a map to 'distance' graph
- Loading branch information
Showing
16 changed files
with
8,591 additions
and
413 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,167 @@ | ||
# Title: Data Wrangling | ||
# Author: Andi Herlan | ||
# Email: andi.herlan@protonmail.com | ||
# Data Used: travel.csv | ||
# Packages Used: stringr, lubridate, dplyr, tidyr | ||
# Output File: - | ||
# Data Output: gojek.rds | ||
|
||
library(dplyr) | ||
library(tidyr) | ||
library(stringr) | ||
library(lubridate) | ||
|
||
|
||
# invoice mail from GOJEK | ||
travel <- read.csv("travel.csv") | ||
content <- travel$body | ||
gojek <- travel %>% as_tibble() | ||
|
||
# datetime order == datetime email | ||
gojek$datetime <- as_datetime(gojek$datetime, | ||
format = "%m/%d/%Y %H:%M:%S", | ||
tz = "Asia/Jakarta") | ||
|
||
# ride or car | ||
gojek$vehicle <- content %>% | ||
str_extract("G[O|o]-?\\w{3,4}") %>% | ||
str_to_lower() %>% | ||
str_remove("go-?") %>% | ||
str_to_title() | ||
|
||
# distance | ||
gojek$distance <- content %>% | ||
str_extract("[D|J]\\w+.+\\skm") %>% | ||
str_extract("\\d+(\\.\\d+)?") %>% | ||
as.numeric() | ||
|
||
# duration | ||
dur <- content %>% | ||
str_extract("[T|W]\\w+\\s\\w+\\s(:?\\d{2}){3}") %>% | ||
str_extract("(:?\\d{2}){3}") %>% | ||
as_tibble() %>% | ||
separate(value, sep = ":", into = c("h", "m", "s")) | ||
|
||
dur <- sapply(dur, as.numeric) %>% as_tibble() | ||
dur$duration <- dur$h*60*60 + dur$m*60 + dur$s | ||
gojek$duration <- duration(dur$duration) | ||
|
||
# price | ||
gojek$price <- content %>% | ||
str_extract("\\w+\\s\\(.+\\)\\sRp\\d+\\.\\d+") %>% | ||
str_extract("Rp\\d+(\\.\\d+)?") %>% | ||
str_remove("Rp") %>% | ||
str_remove("\\.") %>% | ||
as.numeric() | ||
|
||
# discount | ||
gojek$discount <- content %>% | ||
str_extract("Diskon.+") %>% | ||
str_remove("Diskon.+Rp") %>% | ||
str_remove("\\.") %>% | ||
as.numeric() | ||
|
||
# voucher | ||
gojek$voucher <- content %>% | ||
str_extract("Voucher.+") %>% | ||
str_remove("Voucher.+Rp") %>% | ||
str_remove("\\.") %>% | ||
as.numeric() | ||
|
||
# app service fee | ||
gojek$fee <- content %>% | ||
str_extract("Biaya jasa aplikasi.+\\d{3}") %>% | ||
str_remove("^B.+Rp") %>% | ||
str_remove("\\.") %>% | ||
as.numeric() | ||
|
||
# additional fee | ||
gojek$additional <- content %>% | ||
str_extract("Pendapatan tambahan.+") %>% | ||
str_remove("^P.+Rp") %>% | ||
str_remove("\\.") %>% | ||
as.numeric() | ||
|
||
# toll and parking | ||
gojek$toll <- content %>% | ||
str_extract("Ongkos tol/parkir.+") %>% | ||
str_remove("^O.+Rp") %>% | ||
str_remove("\\.") %>% | ||
as.numeric() | ||
|
||
# total payment | ||
gojek$paid <- content %>% | ||
str_extract("TOTAL.+") %>% | ||
str_extract("Rp\\d+(\\.\\d+)?") %>% | ||
str_remove("Rp") %>% | ||
str_remove("\\.") %>% | ||
as.numeric() | ||
|
||
# payment method | ||
gojek$payment <- content %>% | ||
str_extract("G[O|o]-?P.+") %>% | ||
str_remove("-") %>% | ||
str_to_title() %>% | ||
str_trim() | ||
|
||
gojek <- gojek %>% | ||
mutate(payment = ifelse(is.na(payment), "Cash", payment)) | ||
|
||
# pickup | ||
gojek$pickup <- content %>% | ||
str_squish() %>% | ||
str_extract("image: pickup.+image: drop") %>% | ||
str_remove_all("image:") %>% | ||
str_remove("pickup") %>% | ||
str_remove("drop") %>% | ||
str_remove("\\]") %>% | ||
str_remove("\\[") %>% | ||
str_remove(" ") %>% | ||
str_remove("Penjemputan\\s?\\*\\s?\\•\\s?\\d{2}:\\d{2}\\s?\\*") %>% | ||
str_remove("pick up\\s?\\*\\s?\\•\\s?\\d{2}:\\d{2}\\s?\\*") %>% | ||
str_trim() | ||
|
||
# destination | ||
gojek$destination <- content %>% | ||
str_squish() %>% | ||
str_extract("image: drop.+image: Driver Image") %>% | ||
str_remove_all("[I|i]mage:?") %>% | ||
str_remove("drop") %>% | ||
str_remove("\\]") %>% | ||
str_remove("\\[") %>% | ||
str_remove("Driver") %>% | ||
str_remove(" ") %>% | ||
str_remove("Tujuan\\s?\\*\\s?\\•\\s?\\d{2}:\\d{2}\\s?\\*") %>% | ||
str_remove("destination\\s?\\*\\s?\\•\\s?\\d{2}:\\d{2}\\s?\\*") %>% | ||
str_trim() | ||
|
||
# driver | ||
gojek$driver <- content %>% | ||
str_extract("Driver Image\\]\\s\n\n.+") %>% | ||
str_remove_all("Driver Image] \n\n") %>% | ||
str_remove("(Your driver)?(Driver Anda)?") %>% | ||
str_trim() | ||
|
||
gojek <- gojek %>% select(-2:-4) | ||
|
||
# add voucher to discount; gather fees; NA == 0 | ||
gojek <- gojek %>% | ||
mutate(discount = ifelse(is.na(discount), 0, discount), | ||
voucher = ifelse(is.na(voucher), 0, voucher), | ||
discount = discount + voucher, | ||
fee = ifelse(is.na(fee), 0, fee), | ||
additional = ifelse(is.na(additional), 0, additional), | ||
toll = ifelse(is.na(toll), 0, toll), | ||
fee = fee + additional + toll) %>% | ||
select(-voucher, -additional, -toll) | ||
|
||
|
||
# check: price - discount + fee = paid | ||
gojek$price - gojek$discount + gojek$fee == gojek$paid | ||
|
||
# total consumption | ||
colSums(gojek[,c(3:8)]) | ||
|
||
# save to RDS | ||
saveRDS(gojek, file = "output/gojek.rds") | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
# Title: Clock | ||
# Author: Andi Herlan | ||
# Email: andi.herlan@protonmail.com | ||
# Data Used: gojek.rds | ||
# Packages Used: dplyr, lubridate, ggplot2 | ||
# Output File: clock.png | ||
# Data Output: - | ||
# Reference: https://www.wjakethompson.com/post/2018-11-27-ggclock/ | ||
|
||
|
||
# clear environment | ||
rm(list = ls()) | ||
|
||
library(dplyr) | ||
library(lubridate) | ||
library(ggplot2) | ||
|
||
# main data | ||
gojek <- readRDS("output/gojek.rds") | ||
|
||
# data for clocking | ||
clock <- gojek %>% | ||
summarise(datetime = datetime) %>% | ||
mutate(y = as.numeric(pm(datetime)), | ||
x = hour(datetime), | ||
x = ifelse(y == 1, x-12L, x)*60L, | ||
x = x + minute(datetime), | ||
col = gojek$vehicle) | ||
|
||
# plot | ||
ggplot(data = clock) + | ||
# am / pm | ||
annotate(geom = "text", x = 0, y = 0.5, label = "AM", | ||
size = 4, colour = "gray40", alpha = 0.3) + | ||
annotate(geom = "text", x = 0, y = 1.5, label = "PM", | ||
size = 4, colour = "gray40", alpha = 0.3) + | ||
# ride | ||
geom_segment(aes(x, y, xend = x + 1, yend = y + 0.8), | ||
colour = "#00AA13", alpha = 0.4) + | ||
# car | ||
geom_point(data = filter(clock, clock$col == "Car"), | ||
aes(x, y), colour = "#000000", alpha = 0.8) + | ||
# coordinate | ||
coord_polar() + | ||
expand_limits(y = c(-1, 1)) + | ||
scale_x_continuous(limits = c(0, 720), | ||
breaks = seq(180, 720, 180), | ||
labels = c(3, 6, 9, "12")) + | ||
# text | ||
labs(title = "Travel with GOJEK", x = NULL, y = NULL, | ||
subtitle = paste("Andi's movement in", | ||
min(year(gojek$datetime)), "-", | ||
max(year(gojek$datetime)), "(• is GoCar)", sep = " "), | ||
caption = paste("Last movement at", tail(gojek$datetime, 1), | ||
"AM for vaccination\nGithub: akherlan | Data: GOJEK", | ||
sep = " ")) + | ||
# styling | ||
theme_minimal() + | ||
theme( | ||
text = element_text(family = "Sans"), | ||
axis.title = element_blank(), | ||
axis.ticks = element_blank(), | ||
axis.text.x = element_text(size = 15), | ||
axis.text.y = element_blank(), | ||
panel.grid.major.x = element_line(size = 0.3), | ||
panel.grid.major.y = element_line(size = 0.3, linetype = 2), | ||
panel.grid.minor = element_blank(), | ||
plot.subtitle = element_text(colour = "gray40"), | ||
plot.caption = element_text(colour = "gray60"), | ||
plot.background = element_rect(fill = "white", size = 0) | ||
) | ||
|
||
# save PNG | ||
ggsave("clock.png", path = "figs", dpi = 150, units = "px", | ||
width = 2*540, height = 2*507) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
# Title: Distance | ||
# Author: Andi Herlan | ||
# Email: andi.herlan@protonmail.com | ||
# Data Used: gojek.rds | ||
# Packages Used: dplyr, mapboxapi, ggplot2, ggrepel | ||
# Output File: distance.png | ||
# Data Output: - | ||
# Reference: https://ggplot2-book.org/annotations.html | ||
|
||
|
||
# clear environment | ||
rm(list = ls()) | ||
|
||
library(dplyr) | ||
library(mapboxapi) | ||
library(ggplot2) | ||
library(ggrepel) | ||
|
||
# main data | ||
gojek <- readRDS("output/gojek.rds") | ||
|
||
# data for distance | ||
distn <- gojek %>% | ||
select(vehicle, distance, duration) | ||
|
||
# resettlement | ||
comeback <- gojek %>% | ||
arrange(distance) %>% | ||
tail(1) | ||
|
||
# aladdin's carpet | ||
wrongride <- gojek %>% | ||
arrange(duration) %>% | ||
head(3) | ||
|
||
# plot | ||
p1 <- ggplot(data = distn) + | ||
# total distance | ||
annotate(geom = "text", x = 1.8, y = 28, size = 3.2, hjust = "left", | ||
label = paste0("Total distance:\n", sum(gojek$distance), | ||
"km (", nrow(gojek), " trips)"), | ||
colour = "gray50") + | ||
# total duration | ||
annotate(geom = "text", x = 56, y = 2, size = 3.2, hjust = "right", | ||
label = paste0(round(sum(gojek$duration)/3600, 2), | ||
" hours\non the road"), | ||
colour = "gray50") + | ||
# resettlement | ||
annotate(geom = "point", x = comeback$duration/60, alpha = 0.4, | ||
y = comeback$distance, size = 3.5, col = "orange") + | ||
annotate(geom = "curve", x = 62, y = 25, size = 0.3, col = "gray50", | ||
xend = comeback$duration/60-0.2, yend = comeback$distance-0.4, | ||
curvature = 0.3, arrow = arrow(length = unit(2, "mm"))) + | ||
annotate(geom = "text", label = "Resettled\nfrom Srengseng\nto Depok", | ||
x = 61.4, y = 25, hjust = "right", size = 3.5, col = "gray50") + | ||
# must be wrong | ||
annotate(geom = "curve", x = 10.4, y = 15.5, size = 0.3, col = "gray50", | ||
xend = wrongride$duration[1]/60+0.2, yend = wrongride$distance[1]+0.4, | ||
curvature = 0.3, arrow = arrow(length = unit(2, "mm"))) + | ||
annotate(geom = "text", label = "Aladdin's carpet\n(only ~3-6 second!)", | ||
hjust = "left", x = 11.4, y = 15.5, size = 3, col = "gray50") + | ||
# scatter plot | ||
geom_point(aes(duration/60, distance, colour = vehicle), alpha = 0.4) + | ||
# scales | ||
scale_y_continuous(breaks = seq(0, 30, 5)) + | ||
scale_x_continuous(limits = c(0, 75), breaks = c(seq(0, 15, 5), 30, 45, 60, 75)) + | ||
scale_colour_manual(values = c("#000000", "#00AA13")) + | ||
# main labels | ||
labs(title = "Travel with GOJEK", | ||
subtitle = paste0("Equivalent to Jakarta-Surabaya (781 km) by car"), | ||
caption = "Github: akherlan | Data: GOJEK", | ||
x = "Duration (minutes)", y = "Distance (km)", | ||
colour = "GO") + | ||
# styling | ||
theme_minimal() + | ||
theme( | ||
text = element_text(family = "Sans"), | ||
legend.position = c(0.85, 0.28), | ||
panel.grid.minor = element_blank(), | ||
plot.subtitle = element_text(colour = "gray40"), | ||
plot.caption = element_text(colour = "gray60"), | ||
axis.title = element_text(colour = "gray50"), | ||
axis.text = element_text(colour = "gray50"), | ||
plot.background = element_rect(fill = "white", size = 0) | ||
) | ||
|
||
# save PNG | ||
ggsave("distance.png", path = "figs", dpi = 150, units = "px", | ||
width = 2*817, height = 2*516) | ||
|
||
# add map data | ||
java <- readRDS("output/java.rds") | ||
|
||
# geocoding | ||
js <- tribble(~city, "Jakarta", "Surabaya") | ||
jscoord <- lapply(js$city, mb_geocode) %>% as.data.frame() | ||
jscoord <- as_tibble(t(jscoord)) | ||
names(jscoord) <- c("long", "lat") | ||
js <- bind_cols(js, jscoord) | ||
|
||
# longtrip Jakarta - Surabaya | ||
jsline <- mb_directions(origin = "Jakarta", destination = "Surabaya") | ||
|
||
p2 <- ggplot(java) + | ||
geom_sf(fill = "gray90", alpha = 0.3, colour = "gray80", size = 0.2) + | ||
geom_sf(data = jsline, colour = "black", size = 0.5, linetype = 2) + | ||
geom_point(data = js, aes(long, lat), | ||
colour = "red", size = 2, alpha = 0.6) + | ||
coord_sf() + | ||
theme_void() | ||
|
||
p1 + annotation_custom(ggplotGrob(p2), xmin = 3, xmax = 45, ymin = 18, ymax = 27) | ||
|
||
# save PNG | ||
ggsave("distance_map.png", path = "figs", dpi = 150, units = "px", | ||
width = 2*817, height = 2*516) |
Oops, something went wrong.