Skip to content

Commit

Permalink
Merge pull request #11 from Roco-scientist/thread
Browse files Browse the repository at this point in the history
Thread
  • Loading branch information
Roco-scientist authored Jul 17, 2021
2 parents 8f051b0 + 534c0d1 commit 37ff5de
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 64 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@ embedded-graphics = "0.6"
embedded-hal = "0.2"
clap = "2.33.0"
scraper = "0.12.0"
rayon = "1.5.1"
2 changes: 1 addition & 1 deletion mbta_station_info.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion mbta_vehicle_info.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"Subway":{"Green-D":"Green-D","Orange":"Orange","Green-B":"Green-B","Green":"Green","Green-E":"Green-E","Red":"Red","Mattapan_Trolley":"Mattapan","Blue":"Blue","Green-C":"Green-C"},"Commuter_Rail":{"Foxboro_Event_Service":"CR-Foxboro","Fitchburg":"CR-Fitchburg","Franklin":"CR-Franklin","Providence/Stoughton":"CR-Providence","Greenbush":"CR-Greenbush","Fairmount":"CR-Fairmount","Lowell":"CR-Lowell","Haverhill":"CR-Haverhill","Middleborough/Lakeville":"CR-Middleborough","Needham":"CR-Needham","Kingston":"CR-Kingston","Newburyport/Rockport":"CR-Newburyport","Framingham/Worcester":"CR-Worcester"},"Ferry":{"Hingham/Hull_Ferry":"Boat-F1","Charlestown_Ferry":"Boat-F4"}}
{"Subway":{"Mattapan_Trolley":"Mattapan","Blue":"Blue","Green-C":"Green-C","Green-D":"Green-D","Green-E":"Green-E","Orange":"Orange","Green-B":"Green-B","Red":"Red","Green":"Green"},"Ferry":{"Charlestown_Ferry":"Boat-F4","Hingham/Hull_Ferry":"Boat-F1"},"Commuter_Rail":{"Lowell":"CR-Lowell","Newburyport/Rockport":"CR-Newburyport","Providence/Stoughton":"CR-Providence","Fairmount":"CR-Fairmount","Franklin":"CR-Franklin","Haverhill":"CR-Haverhill","Middleborough/Lakeville":"CR-Middleborough","Needham":"CR-Needham","Fitchburg":"CR-Fitchburg","Foxboro_Event_Service":"CR-Foxboro","Kingston":"CR-Kingston","Framingham/Worcester":"CR-Worcester","Greenbush":"CR-Greenbush"}}
14 changes: 9 additions & 5 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use mbta_countdown;
// use rppal::gpio;
use std::{
sync::{Arc, Mutex},
thread, time,
thread, time,collections::HashMap,
};

fn main() {
Expand Down Expand Up @@ -66,16 +66,19 @@ pub fn arguments() -> Result<(String, String, u8, String), Box<dyn std::error::E
// get a list of stations to limit the station argument input
let mut input_stations: Vec<&str> = station_info.keys().map(|key| key.as_str()).collect();
input_stations.sort();
// create an empty hashmap to handle errors when the key does not exist and update is called
let mut empty_vehicle_hashmap = HashMap::new();
empty_vehicle_hashmap.insert("".to_string(), "".to_string());
// get a list of commuter rail lines to limit the commuter rail argument input
let commuter_rails = vehicle_info.get("Commuter_Rail").unwrap();
let commuter_rails = vehicle_info.get("Commuter_Rail").unwrap_or(&empty_vehicle_hashmap);
let mut input_commuter: Vec<&str> = commuter_rails.keys().map(|key| key.as_str()).collect();
input_commuter.sort();
// get a list of subway lines to limit the subway argument input
let subway_lines = vehicle_info.get("Subway").unwrap();
let subway_lines = vehicle_info.get("Subway").unwrap_or(&empty_vehicle_hashmap);
let mut input_subway: Vec<&str> = subway_lines.keys().map(|key| key.as_str()).collect();
input_subway.sort();
// get a list of ferry lines to limit the ferry argument input
let ferry_lines = vehicle_info.get("Ferry").unwrap();
let ferry_lines = vehicle_info.get("Ferry").unwrap_or(&empty_vehicle_hashmap);
let mut input_ferry: Vec<&str> = ferry_lines.keys().map(|key| key.as_str()).collect();
input_ferry.sort();

Expand Down Expand Up @@ -175,14 +178,15 @@ pub fn arguments() -> Result<(String, String, u8, String), Box<dyn std::error::E
if let Some(ferry) = args.value_of("ferry_line") {
vehicle_code = ferry_lines.get(ferry).unwrap().to_owned()
}
}
};

// Convert station to API code and check if the vehicle code exists at the station
let mut station = String::new();
if let Some(station_input) = args.value_of("station") {
let station_hashmap = station_info.get(station_input).unwrap();
station = station_hashmap.keys().last().unwrap().to_owned();
let stopping = station_hashmap.get(&station).unwrap().to_string();
let stopping = station_hashmap.get(&station).unwrap();
if !stopping.contains(&vehicle_code){
panic!("{} not at {}\nStopping at {}: {:?}", vehicle_code, station, station, stopping)
}
Expand Down
145 changes: 88 additions & 57 deletions src/mbta_info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ use scraper::{Html, Selector};
use std::{io::{BufWriter, BufReader},path::Path, fs::File, collections::HashMap};
use reqwest;
use serde_json;
use rayon::{ThreadPoolBuilder, prelude::*};
use std::sync::{Arc, Mutex};

/// Scrapes MBTA station and vehicle info from their website then stores the information in JSON files and returns in HashMaps
///
Expand All @@ -20,43 +22,56 @@ pub fn all_mbta_info(update: bool) -> Result<(HashMap<String, HashMap<String, St
let mbta_vehicle_file_loc = "mbta_vehicle_info.json";
let mbta_station_file_loc = "mbta_station_info.json";

// initiate vehicle hashmap for inner scope
let mut vehicle_info;
// if mbta vehicle JSON exists or update not called, read the JSON
if Path::new(&mbta_vehicle_file_loc).exists() && !update {
let g = File::open(&mbta_vehicle_file_loc)?;
let reader = BufReader::new(g);
vehicle_info = serde_json::from_reader(reader)?;
}else{
// otherwise scrape all data from the website and save into JSON files
let commuter_info = retrieve_commuter()?;
let subway_info = retrieve_subway()?;
let ferry_info = retrieve_ferry()?;
vehicle_info = HashMap::new();
vehicle_info.insert("Commuter_Rail".to_string(), commuter_info);
vehicle_info.insert("Subway".to_string(), subway_info);
vehicle_info.insert("Ferry".to_string(), ferry_info);
if !Path::new(&mbta_vehicle_file_loc).exists() | update {
println!("Updating vehicle information");

let vehicle_info_mutex = Arc::new(Mutex::new(HashMap::new()));
let vehicle_clone_outer = Arc::clone(&vehicle_info_mutex);
let pool = ThreadPoolBuilder::new().num_threads(3).build().expect("Threadpool failure");
pool.scope(move |s|{
let vehicle_clone_1 = Arc::clone(&vehicle_clone_outer);
s.spawn(move |_s|{
let commuter_info = retrieve_commuter().unwrap_or_else(|err| panic!("Error: {}", err));
let mut vehicle_info_unlocked = vehicle_clone_1.lock().unwrap();
vehicle_info_unlocked.insert("Commuter_Rail".to_string(), commuter_info);
});
let vehicle_clone_2 = Arc::clone(&vehicle_clone_outer);
s.spawn(move |_s|{
let subway_info = retrieve_subway().unwrap_or_else(|err| panic!("Error: {}", err));
let mut vehicle_info_unlocked = vehicle_clone_2.lock().unwrap();
vehicle_info_unlocked.insert("Subway".to_string(), subway_info);
});
let vehicle_clone_3 = Arc::clone(&vehicle_clone_outer);
s.spawn(move |_s|{
let ferry_info = retrieve_ferry().unwrap_or_else(|err| panic!("Error: {}", err));
let mut vehicle_info_unlocked = vehicle_clone_3.lock().unwrap();
vehicle_info_unlocked.insert("Ferry".to_string(), ferry_info);
});
});

let f = File::create(&mbta_vehicle_file_loc)?;
let bw = BufWriter::new(f);
let vehicle_info = Arc::try_unwrap(vehicle_info_mutex).unwrap().into_inner()?;
serde_json::to_writer(bw, &vehicle_info)?;
}
}else{println!("Using existing vehicle information")};
let g = File::open(&mbta_vehicle_file_loc)?;
let reader = BufReader::new(g);
let vehicle_info = serde_json::from_reader(reader)?;

// initiate station hashmap for inner scope
let station_info;
// if mbta station JSON exists or update not called, read the JSON
if Path::new(&mbta_station_file_loc).exists() && !update {
let g = File::open(&mbta_station_file_loc)?;
let reader = BufReader::new(g);
station_info = serde_json::from_reader(reader)?;
}else{
if !Path::new(&mbta_station_file_loc).exists() | update {
println!("Updating station information");
// otherwise scrape all data from the website
station_info = retrieve_stations()?;
let station_info_to_write = retrieve_stations()?;
let f = File::create(&mbta_station_file_loc)?;
let bw = BufWriter::new(f);
serde_json::to_writer(bw, &station_info)?;
}
serde_json::to_writer(bw, &station_info_to_write)?;
}else{println!("Using existing station information")};
let g = File::open(&mbta_station_file_loc)?;
let reader = BufReader::new(g);
let station_info = serde_json::from_reader(reader)?;
return Ok((vehicle_info, station_info))

}

/// Scrapes all station information from the MBTA websites and returns a HashMap of the information
Expand All @@ -67,15 +82,24 @@ fn retrieve_stations() -> Result<HashMap<String, HashMap<String, Vec<String>>>,
let ferry_url = "https://www.mbta.com/stops/ferry#ferry-tab";

// Parse the urls for the station information and add to the hashmap
let stations_info = parse_stations(subway_url)?;
let mut station_conversion: HashMap<String, HashMap<String, Vec<String>>> = stations_info.iter().cloned().collect();
station_conversion.extend(parse_stations(communter_url)?);
station_conversion.extend(parse_stations(ferry_url)?);
let mut station_conversion = HashMap::new();
station_conversion = update_station_hashmap(station_conversion, parse_stations(subway_url)?);
station_conversion = update_station_hashmap(station_conversion, parse_stations(communter_url)?);
station_conversion = update_station_hashmap(station_conversion, parse_stations(ferry_url)?);
return Ok(station_conversion)
}

fn update_station_hashmap(mut station_conversion: HashMap<String, HashMap<String, Vec<String>>>, new_stations_info: Vec<(String, String, Vec<String>)>) -> HashMap<String, HashMap<String, Vec<String>>> {
for (station, station_api, vehicles) in new_stations_info {
let mut api_veh = HashMap::new();
api_veh.insert(station_api, vehicles);
station_conversion.insert(station, api_veh);
}
return station_conversion
}

/// Pulls the station information along with vehicles that stop at the station from the given URL
fn parse_stations(url: &str) -> Result<Vec<(String, HashMap<String, Vec<String>>)>, Box<dyn std::error::Error>> {
fn parse_stations(url: &str) -> Result<Vec<(String, String, Vec<String>)>, Box<dyn std::error::Error>> {
// get the website text
let website_text = reqwest::blocking::get(url)?.text()?;

Expand All @@ -85,31 +109,36 @@ fn parse_stations(url: &str) -> Result<Vec<(String, HashMap<String, Vec<String>>
let buttons = document.select(&button_selector);

// iterate on buttons and pull out the station information
let station_conversion: Vec<(String, HashMap<String, Vec<String>>)> = buttons
.map(|button| (
// get and rename the common understood station name
button
// Rayon threads connot pass iterated buttons, so this is done beforehand
let station_api: Vec<(String, String)> = buttons.map(|button|{
let station_name = button
.value()
.attr("data-name")
.unwrap()
.replace(" ", "_")
.replace("'", "");
let station_api_name = button
.value()
.attr("data-name")
.attr("href")
.unwrap()
.replace(" ", "_")
.replace("'", ""),
// get the API station name and the vehicles that stop at the station
station_vehicles(
button
.value()
.attr("href")
.unwrap()
.replace("/stops/", "")
).unwrap()
)
)
.collect();
return Ok(station_conversion)
.replace("/stops/", "");
(station_name, station_api_name)
}).collect();

// create new vector to put parallel results into
let mut station_info_all = Vec::new();

// add station vehicles through rayon threads with par_iter
station_api
.par_iter()
.map(|(station_name, station_api_name)| (station_name.clone(), station_api_name.clone(), station_vehicles(&station_api_name).unwrap_or_else(|err| panic!("Station vehicle error: {}", err))))
.collect_into_vec(&mut station_info_all);

return Ok(station_info_all)
}

/// Finds all vehicles that stop at the station of interest
fn station_vehicles(station_code: String) -> Result<HashMap<String, Vec<String>>, Box<dyn std::error::Error>> {
fn station_vehicles(station_code: &str) -> Result<Vec<String>, Box<dyn std::error::Error>> {
println!("Retrieving info for station: {}", station_code);
// get the website text for the station
let station_url = format!("https://www.mbta.com/stops/{}", station_code);
Expand All @@ -122,37 +151,38 @@ fn station_vehicles(station_code: String) -> Result<HashMap<String, Vec<String>>

// pull out vehicle codes from the buttons and place into a vec
let vehicles: Vec<String> = vehicle_buttons.map(|button| button.value().attr("href").unwrap().replace("/schedules/", "")).collect();

// create hashmap of station_code:[vehicle_codes]
let mut station_vehicles_hash = HashMap::new();
station_vehicles_hash.insert(station_code, vehicles);
return Ok(station_vehicles_hash)
return Ok(vehicles)
}

/// Retrieve commuter rail conversion for MBTA API from common understandable name to MTBA API code
fn retrieve_commuter() -> Result<HashMap<String, String>, Box<dyn std::error::Error>> {
println!("Starting Commuter");
// use the commuter rail schedule website to find the commuter rail codes which are located within the buttons
let commuter_url = "https://www.mbta.com/schedules/commuter-rail";
// parse the commuter rail schedule website
let commuter_info = parse_schedule_website(commuter_url, r#"a[class="c-grid-button c-grid-button--commuter-rail"]"#, r#"span[class="c-grid-button__name"]"#)?;
// crate a hashmap out of the conversion information
let commuter_conversion: HashMap<String, String> = commuter_info.iter().map(|commuter| (commuter[0].clone(), commuter[1].clone())).collect();
println!("Finished Commuter");
return Ok(commuter_conversion)
}

/// Retrieve ferry conversion for MBTA API from common understandable name to MTBA API code
fn retrieve_ferry() -> Result<HashMap<String, String>, Box<dyn std::error::Error>> {
println!("Starting Ferry");
// use the ferry schedule website to find the ferry codes which are located within the buttons
let ferry_url = "https://www.mbta.com/schedules/ferry";
// parse the ferry schedule website
let ferry_info = parse_schedule_website(ferry_url, r#"a[class="c-grid-button c-grid-button--ferry"]"#, r#"span[class="c-grid-button__name"]"#)?;
// crate a hashmap out of the conversion information
let ferry_conversion: HashMap<String, String> = ferry_info.iter().map(|ferry| (ferry[0].clone(), ferry[1].clone())).collect();
println!("Finished Ferry");
return Ok(ferry_conversion)
}

/// Retrieve subway conversion for MBTA API from common understandable name to MTBA API code.
fn retrieve_subway() -> Result<HashMap<String, String>, Box<dyn std::error::Error>> {
println!("Starting Subway");
// use the subway schedule website to get the conversion information from the buttons
let subway_url = "https://www.mbta.com/schedules/subway";
// buttons are setup slightly different than the commuter rail. Each colored line starts with the &str below but finishes with the color, so each needs to be determined for a scraper selector
Expand All @@ -168,6 +198,7 @@ fn retrieve_subway() -> Result<HashMap<String, String>, Box<dyn std::error::Erro
let green_lines_info = parse_schedule_website(subway_url, r#"a[class="c-grid-button__condensed"]"#, r#"svg[role="img"]"#)?;
// add green lines to the subway hashmap
subway_conversion.extend(green_lines_info.iter().map(|green| (green[1].clone(), green[1].clone())));
println!("Finished Subway");
return Ok(subway_conversion)
}

Expand Down

0 comments on commit 37ff5de

Please sign in to comment.