Skip to content

Commit

Permalink
include data and add a map to 'distance' graph
Browse files Browse the repository at this point in the history
  • Loading branch information
akherlan committed Aug 27, 2021
1 parent 8f040b9 commit d78d4ee
Show file tree
Hide file tree
Showing 16 changed files with 8,591 additions and 413 deletions.
167 changes: 167 additions & 0 deletions 1-wrangling.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
# Title: Data Wrangling
# Author: Andi Herlan
# Email: andi.herlan@protonmail.com
# Data Used: travel.csv
# Packages Used: stringr, lubridate, dplyr, tidyr
# Output File: -
# Data Output: gojek.rds

library(dplyr)
library(tidyr)
library(stringr)
library(lubridate)


# invoice mail from GOJEK
travel <- read.csv("travel.csv")
content <- travel$body
gojek <- travel %>% as_tibble()

# datetime order == datetime email
gojek$datetime <- as_datetime(gojek$datetime,
format = "%m/%d/%Y %H:%M:%S",
tz = "Asia/Jakarta")

# ride or car
gojek$vehicle <- content %>%
str_extract("G[O|o]-?\\w{3,4}") %>%
str_to_lower() %>%
str_remove("go-?") %>%
str_to_title()

# distance
gojek$distance <- content %>%
str_extract("[D|J]\\w+.+\\skm") %>%
str_extract("\\d+(\\.\\d+)?") %>%
as.numeric()

# duration
dur <- content %>%
str_extract("[T|W]\\w+\\s\\w+\\s(:?\\d{2}){3}") %>%
str_extract("(:?\\d{2}){3}") %>%
as_tibble() %>%
separate(value, sep = ":", into = c("h", "m", "s"))

dur <- sapply(dur, as.numeric) %>% as_tibble()
dur$duration <- dur$h*60*60 + dur$m*60 + dur$s
gojek$duration <- duration(dur$duration)

# price
gojek$price <- content %>%
str_extract("\\w+\\s\\(.+\\)\\sRp\\d+\\.\\d+") %>%
str_extract("Rp\\d+(\\.\\d+)?") %>%
str_remove("Rp") %>%
str_remove("\\.") %>%
as.numeric()

# discount
gojek$discount <- content %>%
str_extract("Diskon.+") %>%
str_remove("Diskon.+Rp") %>%
str_remove("\\.") %>%
as.numeric()

# voucher
gojek$voucher <- content %>%
str_extract("Voucher.+") %>%
str_remove("Voucher.+Rp") %>%
str_remove("\\.") %>%
as.numeric()

# app service fee
gojek$fee <- content %>%
str_extract("Biaya jasa aplikasi.+\\d{3}") %>%
str_remove("^B.+Rp") %>%
str_remove("\\.") %>%
as.numeric()

# additional fee
gojek$additional <- content %>%
str_extract("Pendapatan tambahan.+") %>%
str_remove("^P.+Rp") %>%
str_remove("\\.") %>%
as.numeric()

# toll and parking
gojek$toll <- content %>%
str_extract("Ongkos tol/parkir.+") %>%
str_remove("^O.+Rp") %>%
str_remove("\\.") %>%
as.numeric()

# total payment
gojek$paid <- content %>%
str_extract("TOTAL.+") %>%
str_extract("Rp\\d+(\\.\\d+)?") %>%
str_remove("Rp") %>%
str_remove("\\.") %>%
as.numeric()

# payment method
gojek$payment <- content %>%
str_extract("G[O|o]-?P.+") %>%
str_remove("-") %>%
str_to_title() %>%
str_trim()

gojek <- gojek %>%
mutate(payment = ifelse(is.na(payment), "Cash", payment))

# pickup
gojek$pickup <- content %>%
str_squish() %>%
str_extract("image: pickup.+image: drop") %>%
str_remove_all("image:") %>%
str_remove("pickup") %>%
str_remove("drop") %>%
str_remove("\\]") %>%
str_remove("\\[") %>%
str_remove(" ") %>%
str_remove("Penjemputan\\s?\\*\\s?\\\\s?\\d{2}:\\d{2}\\s?\\*") %>%
str_remove("pick up\\s?\\*\\s?\\\\s?\\d{2}:\\d{2}\\s?\\*") %>%
str_trim()

# destination
gojek$destination <- content %>%
str_squish() %>%
str_extract("image: drop.+image: Driver Image") %>%
str_remove_all("[I|i]mage:?") %>%
str_remove("drop") %>%
str_remove("\\]") %>%
str_remove("\\[") %>%
str_remove("Driver") %>%
str_remove(" ") %>%
str_remove("Tujuan\\s?\\*\\s?\\\\s?\\d{2}:\\d{2}\\s?\\*") %>%
str_remove("destination\\s?\\*\\s?\\\\s?\\d{2}:\\d{2}\\s?\\*") %>%
str_trim()

# driver
gojek$driver <- content %>%
str_extract("Driver Image\\]\\s\n\n.+") %>%
str_remove_all("Driver Image] \n\n") %>%
str_remove("(Your driver)?(Driver Anda)?") %>%
str_trim()

gojek <- gojek %>% select(-2:-4)

# add voucher to discount; gather fees; NA == 0
gojek <- gojek %>%
mutate(discount = ifelse(is.na(discount), 0, discount),
voucher = ifelse(is.na(voucher), 0, voucher),
discount = discount + voucher,
fee = ifelse(is.na(fee), 0, fee),
additional = ifelse(is.na(additional), 0, additional),
toll = ifelse(is.na(toll), 0, toll),
fee = fee + additional + toll) %>%
select(-voucher, -additional, -toll)


# check: price - discount + fee = paid
gojek$price - gojek$discount + gojek$fee == gojek$paid

# total consumption
colSums(gojek[,c(3:8)])

# save to RDS
saveRDS(gojek, file = "output/gojek.rds")

75 changes: 75 additions & 0 deletions 2-clock.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Title: Clock
# Author: Andi Herlan
# Email: andi.herlan@protonmail.com
# Data Used: gojek.rds
# Packages Used: dplyr, lubridate, ggplot2
# Output File: clock.png
# Data Output: -
# Reference: https://www.wjakethompson.com/post/2018-11-27-ggclock/


# clear environment
rm(list = ls())

library(dplyr)
library(lubridate)
library(ggplot2)

# main data
gojek <- readRDS("output/gojek.rds")

# data for clocking
clock <- gojek %>%
summarise(datetime = datetime) %>%
mutate(y = as.numeric(pm(datetime)),
x = hour(datetime),
x = ifelse(y == 1, x-12L, x)*60L,
x = x + minute(datetime),
col = gojek$vehicle)

# plot
ggplot(data = clock) +
# am / pm
annotate(geom = "text", x = 0, y = 0.5, label = "AM",
size = 4, colour = "gray40", alpha = 0.3) +
annotate(geom = "text", x = 0, y = 1.5, label = "PM",
size = 4, colour = "gray40", alpha = 0.3) +
# ride
geom_segment(aes(x, y, xend = x + 1, yend = y + 0.8),
colour = "#00AA13", alpha = 0.4) +
# car
geom_point(data = filter(clock, clock$col == "Car"),
aes(x, y), colour = "#000000", alpha = 0.8) +
# coordinate
coord_polar() +
expand_limits(y = c(-1, 1)) +
scale_x_continuous(limits = c(0, 720),
breaks = seq(180, 720, 180),
labels = c(3, 6, 9, "12")) +
# text
labs(title = "Travel with GOJEK", x = NULL, y = NULL,
subtitle = paste("Andi's movement in",
min(year(gojek$datetime)), "-",
max(year(gojek$datetime)), "(• is GoCar)", sep = " "),
caption = paste("Last movement at", tail(gojek$datetime, 1),
"AM for vaccination\nGithub: akherlan | Data: GOJEK",
sep = " ")) +
# styling
theme_minimal() +
theme(
text = element_text(family = "Sans"),
axis.title = element_blank(),
axis.ticks = element_blank(),
axis.text.x = element_text(size = 15),
axis.text.y = element_blank(),
panel.grid.major.x = element_line(size = 0.3),
panel.grid.major.y = element_line(size = 0.3, linetype = 2),
panel.grid.minor = element_blank(),
plot.subtitle = element_text(colour = "gray40"),
plot.caption = element_text(colour = "gray60"),
plot.background = element_rect(fill = "white", size = 0)
)

# save PNG
ggsave("clock.png", path = "figs", dpi = 150, units = "px",
width = 2*540, height = 2*507)
116 changes: 116 additions & 0 deletions 3-distance.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
# Title: Distance
# Author: Andi Herlan
# Email: andi.herlan@protonmail.com
# Data Used: gojek.rds
# Packages Used: dplyr, mapboxapi, ggplot2, ggrepel
# Output File: distance.png
# Data Output: -
# Reference: https://ggplot2-book.org/annotations.html


# clear environment
rm(list = ls())

library(dplyr)
library(mapboxapi)
library(ggplot2)
library(ggrepel)

# main data
gojek <- readRDS("output/gojek.rds")

# data for distance
distn <- gojek %>%
select(vehicle, distance, duration)

# resettlement
comeback <- gojek %>%
arrange(distance) %>%
tail(1)

# aladdin's carpet
wrongride <- gojek %>%
arrange(duration) %>%
head(3)

# plot
p1 <- ggplot(data = distn) +
# total distance
annotate(geom = "text", x = 1.8, y = 28, size = 3.2, hjust = "left",
label = paste0("Total distance:\n", sum(gojek$distance),
"km (", nrow(gojek), " trips)"),
colour = "gray50") +
# total duration
annotate(geom = "text", x = 56, y = 2, size = 3.2, hjust = "right",
label = paste0(round(sum(gojek$duration)/3600, 2),
" hours\non the road"),
colour = "gray50") +
# resettlement
annotate(geom = "point", x = comeback$duration/60, alpha = 0.4,
y = comeback$distance, size = 3.5, col = "orange") +
annotate(geom = "curve", x = 62, y = 25, size = 0.3, col = "gray50",
xend = comeback$duration/60-0.2, yend = comeback$distance-0.4,
curvature = 0.3, arrow = arrow(length = unit(2, "mm"))) +
annotate(geom = "text", label = "Resettled\nfrom Srengseng\nto Depok",
x = 61.4, y = 25, hjust = "right", size = 3.5, col = "gray50") +
# must be wrong
annotate(geom = "curve", x = 10.4, y = 15.5, size = 0.3, col = "gray50",
xend = wrongride$duration[1]/60+0.2, yend = wrongride$distance[1]+0.4,
curvature = 0.3, arrow = arrow(length = unit(2, "mm"))) +
annotate(geom = "text", label = "Aladdin's carpet\n(only ~3-6 second!)",
hjust = "left", x = 11.4, y = 15.5, size = 3, col = "gray50") +
# scatter plot
geom_point(aes(duration/60, distance, colour = vehicle), alpha = 0.4) +
# scales
scale_y_continuous(breaks = seq(0, 30, 5)) +
scale_x_continuous(limits = c(0, 75), breaks = c(seq(0, 15, 5), 30, 45, 60, 75)) +
scale_colour_manual(values = c("#000000", "#00AA13")) +
# main labels
labs(title = "Travel with GOJEK",
subtitle = paste0("Equivalent to Jakarta-Surabaya (781 km) by car"),
caption = "Github: akherlan | Data: GOJEK",
x = "Duration (minutes)", y = "Distance (km)",
colour = "GO") +
# styling
theme_minimal() +
theme(
text = element_text(family = "Sans"),
legend.position = c(0.85, 0.28),
panel.grid.minor = element_blank(),
plot.subtitle = element_text(colour = "gray40"),
plot.caption = element_text(colour = "gray60"),
axis.title = element_text(colour = "gray50"),
axis.text = element_text(colour = "gray50"),
plot.background = element_rect(fill = "white", size = 0)
)

# save PNG
ggsave("distance.png", path = "figs", dpi = 150, units = "px",
width = 2*817, height = 2*516)

# add map data
java <- readRDS("output/java.rds")

# geocoding
js <- tribble(~city, "Jakarta", "Surabaya")
jscoord <- lapply(js$city, mb_geocode) %>% as.data.frame()
jscoord <- as_tibble(t(jscoord))
names(jscoord) <- c("long", "lat")
js <- bind_cols(js, jscoord)

# longtrip Jakarta - Surabaya
jsline <- mb_directions(origin = "Jakarta", destination = "Surabaya")

p2 <- ggplot(java) +
geom_sf(fill = "gray90", alpha = 0.3, colour = "gray80", size = 0.2) +
geom_sf(data = jsline, colour = "black", size = 0.5, linetype = 2) +
geom_point(data = js, aes(long, lat),
colour = "red", size = 2, alpha = 0.6) +
coord_sf() +
theme_void()

p1 + annotation_custom(ggplotGrob(p2), xmin = 3, xmax = 45, ymin = 18, ymax = 27)

# save PNG
ggsave("distance_map.png", path = "figs", dpi = 150, units = "px",
width = 2*817, height = 2*516)
Loading

0 comments on commit d78d4ee

Please sign in to comment.