-
Notifications
You must be signed in to change notification settings - Fork 0
mahimavedi/Predict-fare
Folders and files
Name | Name | Last commit message | Last commit date | |
---|---|---|---|---|
Repository files navigation
# Loading the tidyverse library(tidyverse) # Reading in the taxi data taxi <- read_csv("datasets/taxi.csv") # Taking a look at the first couple of rows in taxi head(taxi) #cleaning data taxi <- taxi %>% rename(lat = pickup_latitude) %>% rename(long = pickup_longitude) %>% filter(fare_amount > 0 | tip_amount > 0) %>% mutate(total = log(fare_amount + tip_amount)) #reducing data to one area taxi <- taxi %>% filter(between(lat, 40.70,40.83)) %>% filter(between(long, -74.025, -73.93)) # Loading in ggmap and viridis for nice colors library(ggmap) library(viridis) manhattan <- readRDS("datasets/manhattan.rds") # Drawing a density map with the number of journey start locations ggmap(manhattan, darken = 0.5) + scale_fill_viridis(option = 'plasma') + geom_bin2d(data = taxi, aes(x = long, y = lat), bins = 60, alpha = 0.6) + labs(x = 'longitude', y = 'latitude', fill = 'journeys') # Loading in the tree package library(tree) # Fitting a tree to lat and long fitted_tree <- tree(total ~ lat + long, data = taxi) # Draw a diagram of the tree structure plot(fitted_tree) text(fitted_tree) library(lubridate) # Generate the three new time variables taxi <- taxi %>% mutate(hour = hour(pickup_datetime)) %>% mutate(wday = wday(pickup_datetime, label = TRUE)) %>% mutate(month = month(pickup_datetime, label = TRUE)) head(taxi) fitted_tree <- tree(total ~ lat + long + hour + wday + month, data = taxi) plot(fitted_tree) text(fitted_tree) summary(fitted_tree) library(randomForest) # Fitting a random forest fitted_forest <- randomForest(total ~ lat + long + hour + wday + month, taxi, ntree = 80, sampsize = 10000) taxi$pred_total <- fitted_forest$predicted ggmap(manhattan, darken = 0.5) + scale_fill_viridis(option = 'plasma') + stat_summary_2d(data = taxi, aes(x = long, y = lat, z = pred_total), fun = mean, bins = 60, alpha = 0.6) + labs(x = 'longitude', y = 'latitude', fill = 'journeys') mean_if_enough_data <- function(x) { ifelse( length(x) >= 15, mean(x), NA) } ggmap(manhattan, darken = 0.5) + scale_fill_viridis(option = 'plasma') + stat_summary_2d(data = taxi, aes(x = long, y = lat, z = total), fun = mean_if_enough_data, bins = 60, alpha = 0.6) + labs(x = 'longitude', y = 'latitude', fill = 'fare')
About
No description, website, or topics provided.
Resources
Stars
Watchers
Forks
Releases
No releases published
Packages 0
No packages published