Skip to content

Commit

Permalink
1 more graph about needs
Browse files Browse the repository at this point in the history
  • Loading branch information
akherlan committed Aug 28, 2021
1 parent e100c1b commit 063f78b
Show file tree
Hide file tree
Showing 13 changed files with 237 additions and 9 deletions.
13 changes: 12 additions & 1 deletion 1-wrangling.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ gojek$vehicle <- content %>%
str_remove("go-?") %>%
str_to_title()

# order id
gojek$order_id <- content %>%
str_extract("RB-.+\\d+") %>%
str_squish()

# distance
gojek$distance <- content %>%
str_extract("[D|J]\\w+.+\\skm") %>%
Expand Down Expand Up @@ -162,11 +167,17 @@ gojek <- gojek %>%
gojek$price - gojek$discount + gojek$fee == gojek$paid

# total consumption
colSums(gojek[,c(3:8)])
colSums(gojek[,c(4:9)])

# sort by datetime
gojek <- gojek %>% arrange(datetime)

# check: double invoice
n_distinct(select(gojek, order_id)) == nrow(gojek)

# handle double record
# ???

# save to RDS
saveRDS(gojek, file = "output/gojek.rds")

1 change: 1 addition & 0 deletions 2-clock.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ ggplot(data = clock) +
panel.grid.minor = element_blank(),
plot.subtitle = element_text(colour = "gray40"),
plot.caption = element_text(colour = "gray60"),
plot.title.position = "plot",
plot.background = element_rect(fill = "white", size = 0)
)

Expand Down
3 changes: 2 additions & 1 deletion 3-distance.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Title: Distance
# Author: Andi Herlan
# Email: andi.herlan@protonmail.com
# Data Used: gojek.rds
# Data Used: gojek.rds, java.rds
# Packages Used: dplyr, mapboxapi, ggplot2, ggrepel
# Output File: distance.png
# Data Output: -
Expand Down Expand Up @@ -98,6 +98,7 @@ p1 <- ggplot(data = distn) +
text = element_text(family = "Sans"),
legend.position = c(0.85, 0.28),
panel.grid.minor = element_blank(),
plot.title.position = "plot",
plot.subtitle = element_text(colour = "gray40"),
plot.caption = element_text(colour = "gray60"),
axis.title = element_text(colour = "gray50"),
Expand Down
10 changes: 6 additions & 4 deletions 4-cost.R
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,10 @@ ggplot() +
size = 3.4, colour = "#005400") +
# percent transaction done in app
geom_text(data = filter(cost, cat == 1 & payment == "Cash"),
aes(x = 0.5, y = y_trans + 170, label = paste0(n_trans$percent[2], "%")),
aes(x = 0.5, y = y_trans + 150, label = paste0(n_trans$percent[2], "%")),
size = 12, colour = "gray60") +
geom_text(data = filter(cost, cat == 1 & payment == "Cash"),
aes(x = 0.5, y = y_trans - 350,
aes(x = 0.5, y = y_trans - 365,
label = "of transactions\nwere completed\nwithin the app"),
size = 5, colour = "gray60") +
# graph's labels
Expand All @@ -122,13 +122,15 @@ ggplot() +
axis.title = element_text(colour = "gray50"),
plot.subtitle = element_text(colour = "gray40"),
plot.caption = element_text(colour = "gray60"),
plot.title.position = "plot",
strip.text = element_text(face = "bold", size = 11),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
panel.spacing = unit(0, "cm"),
legend.position = "top",
legend.justification = c(0, 1),
legend.position = c(0.25, 0.85),
legend.direction = "horizontal",
legend.justification = "center",
plot.background = element_rect(fill = "white", size = 0)
)

Expand Down
201 changes: 201 additions & 0 deletions 5-needs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
# Title: Needs
# Author: Andi Herlan
# Email: andi.herlan@protonmail.com
# Data Used: gojek.rds
# Packages Used: dplyr, tidyr, stringr, ggplot2, ggalluvial
# Output File: needs.png
# Data Output: od.rds
# Reference: https://corybrunson.github.io/ggalluvial


# clear environment
rm(list = ls())

library(dplyr)
library(tidyr)
library(stringr)
library(ggplot2)
library(ggalluvial)

# main data
gojek <- readRDS("output/gojek.rds")

# origin destination
od <- gojek %>%
select(datetime, origin = pickup, destination) %>%
pivot_longer(cols = c(origin, destination),
names_to = "pin",
values_to = "address") %>%
separate(address, sep = ",", into = c("location", "drop"), remove = FALSE) %>%
mutate(location = ifelse(is.na(location), address, location)) %>%
select(-address, -drop)

# better for string
od$location <- tolower(od$location)
od$location <- str_replace_all(od$location, "-", " ")

# location categories, OMG my code! $#^$#@%
od <- od %>%
mutate(
# detect station
station = ifelse(str_detect(location, "stasiun"), TRUE, NA),
station = ifelse(str_detect(location, "station"),
TRUE, station),
# detect bus stop
bus = ifelse(str_detect(location, "halte"), TRUE, NA),
bus = ifelse(str_detect(location, "kelapa dua sasak rt.2/rw.2"),
TRUE, bus),
# detect office
office = ifelse(str_detect(location, "tempo"), TRUE, NA),
office = ifelse(str_detect(location, "palmerah barat no.22"),
TRUE, office),
office = ifelse(str_detect(location, "kementerian pekerjaan umum"),
TRUE, office),
office = ifelse(str_detect(location, "ministry of public works"),
TRUE, office),
office = ifelse(str_detect(location, "pattimura no.20"),
TRUE, office),
office = ifelse(str_detect(location, "raden patah"),
TRUE, office),
office = ifelse(str_detect(location, "al azhar"),
TRUE, office),
# detect home
home = ifelse(str_detect(location, "jalan taman indah"), TRUE, NA), # h. ranto
home = ifelse(str_detect(location, "al makmur"), # bu lastri
TRUE, home),
home = ifelse(str_detect(location, "alfamidi raya rtm"), # h. ranto
TRUE, home),
home = ifelse(str_detect(location, "tugu kp areman rt 04/ rw 05 no. 38"),
TRUE, home),
home = ifelse(str_detect(location, "sasak i no.39"), # bu mamay
TRUE, home),
home = ifelse(str_detect(location, "srengseng raya no.45"), # h. nasir
TRUE, home),
home = ifelse(str_detect(location, "lap. tenis"), # h. nasir
TRUE, home),
home = ifelse(str_detect(location, "rumbut no.4"), # h. ranto
TRUE, home),
home = ifelse(str_detect(location, "h. moat"), # bu lastri
TRUE, home),
home = ifelse(str_detect(location, "wisma srikandi"), # bu lastri
TRUE, home),
home = ifelse(str_detect(location, "rtm lampu merah"), # bu lastri
TRUE, home),
home = ifelse(str_detect(location, "bu lastri"), # bu lastri
TRUE, home),
home = ifelse(str_detect(location, "srengseng raya no.7b"), # h. nasir
TRUE, home),
# other places
place = ifelse(!is.na(station), NA, ifelse(
!is.na(bus), NA, ifelse(
!is.na(office), NA, ifelse(
!is.na(home), NA, TRUE))))
)

# empty category
nrow(od) - colSums(!is.na(od[,4:8])) %>% as.matrix() %>% sum()

# odplot <- od %>%
# select(-datetime) %>%
# mutate(count = 1) %>%
# group_by(origin, destination, payment) %>%
# summarise(freq = sum(count), .groups = "drop")

# enrichment
od <- od %>%
select(-location) %>%
pivot_longer(cols = c("station", "bus", "office", "home", "place"),
names_to = "category",
values_to = "value") %>%
filter(value == TRUE) %>%
select(-value) %>%
pivot_wider(id_cols = "datetime",
names_from = "pin",
values_from = "category")

# save od data
saveRDS(od, "output/od.rds")

# add payment
od <- gojek %>%
select(datetime, payment) %>%
left_join(od, by = "datetime")

# data for needs
odp <- od %>%
select(-datetime) %>%
mutate(destination = ifelse(destination == "home", "Go Home",
ifelse(
destination == "bus", "Transit",
ifelse(
destination == "station", "Transit",
ifelse(
destination == "office", "Work",
"Business")))),
origin = ifelse(origin %in% c("station", "bus"),
"stop", origin),
origin = str_to_title(origin),
count = 1) %>%
group_by(origin, destination, payment) %>%
summarise(freq = sum(count), .groups = "drop")

odp$origin <- odp$origin %>%
factor(levels = c("Home", "Office", "Place", "Stop"))

odp$destination <- odp$destination %>%
factor(levels = c("Go Home", "Work", "Business", "Transit"))

# plot
ggplot(data = odp,
aes(axis1 = origin, axis2 = destination, y = freq)) +
scale_x_discrete(limits = c("Origin", "Needs"),
expand = c(0, 0.2, 0, 0.7)) +
geom_alluvium(aes(fill = payment)) +
geom_stratum() +
geom_text(stat = "stratum", aes(label = after_stat(stratum))) +
scale_fill_manual(values = c("orange", "#00BBE0"),
label = c("Cash", "GoPay")) +
annotate(geom = "text", x = 2.25, y = 190,
label = paste("The most frequence trip",
"was from stop (a station",
"or other transit node)",
"to come back home.",
" ",
"The second was a trip",
"for go home after doing",
"some activities.",
" ",
"There was a strong relation",
"between office and transit.",
" ",
"If I had to pay by cash,",
"maybe GoPay balance was 0",
"when doing activity outside.",
sep = "\n"),
hjust = "left", vjust = "top",
size = 3.2, col = "gray50") +
labs(title = "Effort with GOJEK",
subtitle = "I even want to come home from home #SelaluAdaJalan #DiRumahAja",
caption = "Github: akherlan | Data: GOJEK",
fill = "Payment") +
theme_minimal() +
theme(
text = element_text(family = "Sans"),
axis.title = element_blank(),
axis.text.x = element_text(face = "bold", size = 10),
axis.text.y = element_blank(),
panel.grid = element_blank(),
legend.position = c(0.75, 0.85),
legend.direction = "vertical",
legend.justification = "left",
# panel.border = element_rect(colour = "black", size = 1, fill = "transparent", linetype = 2),
plot.title.position = "plot",
plot.subtitle = element_text(colour = "gray40"),
plot.caption = element_text(colour = "gray60"),
plot.background = element_rect(fill = "white", size = 0)
)

# save PNG
ggsave("needs.png", path = "figs", dpi = 150, units = "px",
width = 2*760, height = 2*462)

18 changes: 15 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
## GOJEK
## Travel with Gojek

**Mengolah data invoice dari Gojek**

Expand All @@ -14,10 +14,22 @@ Saya tidak terlalu rutin menggunakan moda transporasi ini. Namun totalnya sudah

![movement with gojek](figs/distance_map.png)

Dengan GOJEK saya sudah melaju seolah-olah mulai dari ujung Barat pulau Jawa hingga ke ujung Timurnya.
Dengan Gojek saya sudah melaju seolah-olah mulai dari ujung Barat pulau Jawa hingga ke ujung Timurnya.

### Costs
### Cost

![expenses with gojek](figs/cost.png)

Biaya ini belum termasuk dengan penggunaan GoBills untuk BPJS, pulsa, dll. apalagi dengan GoFood. Belum.

### Needs

![effort with gojek](figs/needs.png)

Perjalanan terbanyak ketika ng-Gojek adalah dari stasiun (atau halte) untuk pulang, disusul oleh perjalanan pulang selepas beraktivitas di luar selain kerja.

Ada hubungan yang erat antara kantor dengan aktivitas transit.

Jika saya harus menggunakan uang tunai kemungkinan karena saya kehabisan GoPay saat berada di luar rumah.

Pertanyaan berikutnya adalah kenapa ada perjalanan dari rumah ke rumah? Punya rumah lebih dari satu! Hahahaha... Saya pindah-pindah kost sebetulnya.
Binary file modified figs/clock.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified figs/cost.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified figs/distance.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified figs/distance_map.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added figs/needs.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified output/gojek.rds
Binary file not shown.
Binary file added output/od.rds
Binary file not shown.

0 comments on commit 063f78b

Please sign in to comment.