From 238de2ef8b22a345942e848e36e1a5629c14269a Mon Sep 17 00:00:00 2001 From: Devin Matte Date: Mon, 31 Jul 2023 19:03:14 -0400 Subject: [PATCH 1/3] Adding Green line train tracking to new trains --- ingestor/chalicelib/new_trains.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ingestor/chalicelib/new_trains.py b/ingestor/chalicelib/new_trains.py index 6c2c304..7aac6a2 100644 --- a/ingestor/chalicelib/new_trains.py +++ b/ingestor/chalicelib/new_trains.py @@ -5,6 +5,7 @@ ROUTE_DEFINITIONS = { "Red": {"labels": range(1900, 2152), "core_stations": [70077, 70078]}, # Downtown Crossing "Orange": {"labels": range(1400, 1552), "core_stations": [70014, 70015]}, # Back Bay + "Green": {"labels": range(3900, 3923), "core_stations": [70200, 70196]}, # Park Street } EVENT_DEPARTURE = ["DEP", "PRD"] From 16d34c285ca3fd1bdb0d4702a20cd1d06121fec8 Mon Sep 17 00:00:00 2001 From: Devin Matte Date: Tue, 10 Oct 2023 16:32:58 -0400 Subject: [PATCH 2/3] Adding green line, and recording unique cars each day --- ingestor/chalicelib/new_trains.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/ingestor/chalicelib/new_trains.py b/ingestor/chalicelib/new_trains.py index 7aac6a2..6431fd3 100644 --- a/ingestor/chalicelib/new_trains.py +++ b/ingestor/chalicelib/new_trains.py @@ -14,29 +14,47 @@ KEY = "NewTrains/run_counts/{}.csv" +# Handle dual green line cars, we don't need to care about both, just grab the first +def parse_vehicle_label(label): + if "-" in label: + return label.split("-")[0] + return label + + def train_runs(route, date): spec = ROUTE_DEFINITIONS[route] api_data = MbtaPerformanceAPI.get_api_data("events", {"stop": spec["core_stations"]}, date) events = sum([stop["events"] for stop in api_data], []) departures = filter(lambda event: event["event_type"] in EVENT_DEPARTURE, events) by_trip_id = {event["trip_id"]: event for event in departures} # Just in case a single trip gets a DEP and a PRD - return list(filter(lambda event: int(event["vehicle_label"]) in spec["labels"], by_trip_id.values())) + return list( + filter(lambda event: int(parse_vehicle_label(event["vehicle_label"])) in spec["labels"], by_trip_id.values()) + ) + + +def unique_trains(train_events): + # Using | as a delimeter as to not create an undefined amount of columns in a csv + return "|".join(list(set([event["vehicle_label"] for event in train_events]))) def update_all(date): for route in ROUTE_DEFINITIONS.keys(): print(f"Storing new train runs for {route}...") try: - run_count = len(train_runs(route, date)) - update_statistics_file(route, date, run_count) + train_events = train_runs(route, date) + run_count = len(train_events) + unique_train_runs = unique_trains(train_events) + update_statistics_file(route, date, run_count, unique_train_runs) except Exception: print(f"Unable to store new train run count for route={route}", file=sys.stderr) print(sys.exc_info()[2], file=sys.stderr) continue -def update_statistics_file(route, date, count): - csv_row = "{formatted_date},{count}\n".format(formatted_date=date.strftime("%Y-%m-%d"), count=count) +def update_statistics_file(route, date, count, unique_train_runs): + csv_row = "{formatted_date},{count},{unique_train_runs}\n".format( + formatted_date=date.strftime("%Y-%m-%d"), count=count, unique_train_runs=unique_train_runs + ) key = KEY.format(route) try: data = s3.download(BUCKET, key, compressed=False) + csv_row From 3e792037b22c11a9b7e25f04477cec61756f48fc Mon Sep 17 00:00:00 2001 From: Devin Matte Date: Tue, 10 Oct 2023 16:46:26 -0400 Subject: [PATCH 3/3] Handling green line pairs better --- ingestor/chalicelib/new_trains.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ingestor/chalicelib/new_trains.py b/ingestor/chalicelib/new_trains.py index 6431fd3..c3c1ca3 100644 --- a/ingestor/chalicelib/new_trains.py +++ b/ingestor/chalicelib/new_trains.py @@ -1,3 +1,4 @@ +from itertools import chain import sys from chalicelib import MbtaPerformanceAPI, s3 from botocore.exceptions import ClientError @@ -34,7 +35,8 @@ def train_runs(route, date): def unique_trains(train_events): # Using | as a delimeter as to not create an undefined amount of columns in a csv - return "|".join(list(set([event["vehicle_label"] for event in train_events]))) + train_list = list(set(list(chain.from_iterable([event["vehicle_label"].split("-") for event in train_events])))) + return "|".join(train_list) def update_all(date):