From b298db278f3c9bc1610be744bdc7ab9f534b5770 Mon Sep 17 00:00:00 2001 From: amandaha8 Date: Thu, 13 Jun 2024 23:32:30 +0000 Subject: [PATCH 1/3] gtfs portfolio testing --- portfolio/gtfs_digest_testing/_config.yml | 2 +- ...lameda-contra-costa-transit-district.ipynb | 3 +++ ...itol-corridor-joint-powers-authority.ipynb | 3 +++ ...ntral-contra-costa-transit-authority.ipynb | 3 +++ ...ame_city-and-county-of-san-francisco.ipynb | 3 +++ ..._organization_name_city-of-fairfield.ipynb | 3 +++ ...organization_name_city-of-menlo-park.ipynb | 3 +++ ...__organization_name_city-of-petaluma.ipynb | 3 +++ ..._organization_name_city-of-rio-vista.ipynb | 3 +++ ...organization_name_city-of-santa-rosa.ipynb | 3 +++ ...ion_name_city-of-south-san-francisco.ipynb | 3 +++ ...ame_city-and-county-of-san-francisco.ipynb | 3 --- ...organization_name_city-of-union-city.ipynb | 3 +++ ..._organization_name_city-of-vacaville.ipynb | 3 +++ ...n_name_marin-county-transit-district.ipynb | 3 +++ ...napa-valley-transportation-authority.ipynb | 3 +++ ...eninsula-corridor-joint-powers-board.ipynb | 3 +++ ...nd__organization_name_presidio-trust.ipynb | 3 +++ ...isco-bay-area-rapid-transit-district.ipynb | 3 +++ ...isco-bay-area-rapid-transit-district.ipynb | 3 --- ...lameda-contra-costa-transit-district.ipynb | 3 --- portfolio/sites/gtfs_digest_testing.yml | 19 ++++++++++++++++--- 22 files changed, 68 insertions(+), 13 deletions(-) create mode 100644 portfolio/gtfs_digest_testing/district_04-oakland/00__03_report__district_04-oakland__organization_name_alameda-contra-costa-transit-district.ipynb create mode 100644 portfolio/gtfs_digest_testing/district_04-oakland/01__03_report__district_04-oakland__organization_name_capitol-corridor-joint-powers-authority.ipynb create mode 100644 portfolio/gtfs_digest_testing/district_04-oakland/02__03_report__district_04-oakland__organization_name_central-contra-costa-transit-authority.ipynb create mode 100644 portfolio/gtfs_digest_testing/district_04-oakland/03__03_report__district_04-oakland__organization_name_city-and-county-of-san-francisco.ipynb create mode 100644 portfolio/gtfs_digest_testing/district_04-oakland/04__03_report__district_04-oakland__organization_name_city-of-fairfield.ipynb create mode 100644 portfolio/gtfs_digest_testing/district_04-oakland/05__03_report__district_04-oakland__organization_name_city-of-menlo-park.ipynb create mode 100644 portfolio/gtfs_digest_testing/district_04-oakland/06__03_report__district_04-oakland__organization_name_city-of-petaluma.ipynb create mode 100644 portfolio/gtfs_digest_testing/district_04-oakland/07__03_report__district_04-oakland__organization_name_city-of-rio-vista.ipynb create mode 100644 portfolio/gtfs_digest_testing/district_04-oakland/08__03_report__district_04-oakland__organization_name_city-of-santa-rosa.ipynb create mode 100644 portfolio/gtfs_digest_testing/district_04-oakland/09__03_report__district_04-oakland__organization_name_city-of-south-san-francisco.ipynb delete mode 100644 portfolio/gtfs_digest_testing/district_04-oakland/0__03_report__district_04-oakland__organization_name_city-and-county-of-san-francisco.ipynb create mode 100644 portfolio/gtfs_digest_testing/district_04-oakland/10__03_report__district_04-oakland__organization_name_city-of-union-city.ipynb create mode 100644 portfolio/gtfs_digest_testing/district_04-oakland/11__03_report__district_04-oakland__organization_name_city-of-vacaville.ipynb create mode 100644 portfolio/gtfs_digest_testing/district_04-oakland/12__03_report__district_04-oakland__organization_name_marin-county-transit-district.ipynb create mode 100644 portfolio/gtfs_digest_testing/district_04-oakland/13__03_report__district_04-oakland__organization_name_napa-valley-transportation-authority.ipynb create mode 100644 portfolio/gtfs_digest_testing/district_04-oakland/14__03_report__district_04-oakland__organization_name_peninsula-corridor-joint-powers-board.ipynb create mode 100644 portfolio/gtfs_digest_testing/district_04-oakland/15__03_report__district_04-oakland__organization_name_presidio-trust.ipynb create mode 100644 portfolio/gtfs_digest_testing/district_04-oakland/16__03_report__district_04-oakland__organization_name_san-francisco-bay-area-rapid-transit-district.ipynb delete mode 100644 portfolio/gtfs_digest_testing/district_04-oakland/1__03_report__district_04-oakland__organization_name_san-francisco-bay-area-rapid-transit-district.ipynb delete mode 100644 portfolio/gtfs_digest_testing/district_04-oakland/2__03_report__district_04-oakland__organization_name_alameda-contra-costa-transit-district.ipynb diff --git a/portfolio/gtfs_digest_testing/_config.yml b/portfolio/gtfs_digest_testing/_config.yml index 7225f238b..27b6d609f 100644 --- a/portfolio/gtfs_digest_testing/_config.yml +++ b/portfolio/gtfs_digest_testing/_config.yml @@ -1,7 +1,7 @@ # Book settings # Learn more at https://jupyterbook.org/customize/config.html -title: GTFS Digest +title: GTFS Digest TEST author: Cal-ITP copyright: "2024" #logo: calitp_logo_MAIN.png diff --git a/portfolio/gtfs_digest_testing/district_04-oakland/00__03_report__district_04-oakland__organization_name_alameda-contra-costa-transit-district.ipynb b/portfolio/gtfs_digest_testing/district_04-oakland/00__03_report__district_04-oakland__organization_name_alameda-contra-costa-transit-district.ipynb new file mode 100644 index 000000000..cb5aa0db5 --- /dev/null +++ b/portfolio/gtfs_digest_testing/district_04-oakland/00__03_report__district_04-oakland__organization_name_alameda-contra-costa-transit-district.ipynb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceb84992b08b3255253f2b21abdfe29f6534c3814efac3f6ceb6efddd9a872d1 +size 44221849 diff --git a/portfolio/gtfs_digest_testing/district_04-oakland/01__03_report__district_04-oakland__organization_name_capitol-corridor-joint-powers-authority.ipynb b/portfolio/gtfs_digest_testing/district_04-oakland/01__03_report__district_04-oakland__organization_name_capitol-corridor-joint-powers-authority.ipynb new file mode 100644 index 000000000..14d120e68 --- /dev/null +++ b/portfolio/gtfs_digest_testing/district_04-oakland/01__03_report__district_04-oakland__organization_name_capitol-corridor-joint-powers-authority.ipynb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36471c2d67c179636199d6ec796123470a8c78bcab5c4a798f6827712ccf7c65 +size 137164 diff --git a/portfolio/gtfs_digest_testing/district_04-oakland/02__03_report__district_04-oakland__organization_name_central-contra-costa-transit-authority.ipynb b/portfolio/gtfs_digest_testing/district_04-oakland/02__03_report__district_04-oakland__organization_name_central-contra-costa-transit-authority.ipynb new file mode 100644 index 000000000..9faaf71fc --- /dev/null +++ b/portfolio/gtfs_digest_testing/district_04-oakland/02__03_report__district_04-oakland__organization_name_central-contra-costa-transit-authority.ipynb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5728622c0a6ab8e5d4c6c18a4eea9d9ad218fbf3b12e529bd2b1ca948a15f1e3 +size 19181468 diff --git a/portfolio/gtfs_digest_testing/district_04-oakland/03__03_report__district_04-oakland__organization_name_city-and-county-of-san-francisco.ipynb b/portfolio/gtfs_digest_testing/district_04-oakland/03__03_report__district_04-oakland__organization_name_city-and-county-of-san-francisco.ipynb new file mode 100644 index 000000000..7c9dfe820 --- /dev/null +++ b/portfolio/gtfs_digest_testing/district_04-oakland/03__03_report__district_04-oakland__organization_name_city-and-county-of-san-francisco.ipynb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00e6598d5c3bab8bc2289b551f5c29b52766f528b9d6f815bfe479aa8509b7ec +size 25870004 diff --git a/portfolio/gtfs_digest_testing/district_04-oakland/04__03_report__district_04-oakland__organization_name_city-of-fairfield.ipynb b/portfolio/gtfs_digest_testing/district_04-oakland/04__03_report__district_04-oakland__organization_name_city-of-fairfield.ipynb new file mode 100644 index 000000000..37d3545e9 --- /dev/null +++ b/portfolio/gtfs_digest_testing/district_04-oakland/04__03_report__district_04-oakland__organization_name_city-of-fairfield.ipynb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d657db23c23866af5e3a8defe7d079a57951666ad74fe4ff10b7186258ba9d9 +size 2869152 diff --git a/portfolio/gtfs_digest_testing/district_04-oakland/05__03_report__district_04-oakland__organization_name_city-of-menlo-park.ipynb b/portfolio/gtfs_digest_testing/district_04-oakland/05__03_report__district_04-oakland__organization_name_city-of-menlo-park.ipynb new file mode 100644 index 000000000..53749b837 --- /dev/null +++ b/portfolio/gtfs_digest_testing/district_04-oakland/05__03_report__district_04-oakland__organization_name_city-of-menlo-park.ipynb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:690f39f2fc2eda7161f2fea66a7a6522b5342299a8b0a5a34d9df7787e13b1d9 +size 520844 diff --git a/portfolio/gtfs_digest_testing/district_04-oakland/06__03_report__district_04-oakland__organization_name_city-of-petaluma.ipynb b/portfolio/gtfs_digest_testing/district_04-oakland/06__03_report__district_04-oakland__organization_name_city-of-petaluma.ipynb new file mode 100644 index 000000000..17ce094fa --- /dev/null +++ b/portfolio/gtfs_digest_testing/district_04-oakland/06__03_report__district_04-oakland__organization_name_city-of-petaluma.ipynb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe21c721876580cd091174c3a171a520615585a05285877ce5de487bc4679127 +size 2313788 diff --git a/portfolio/gtfs_digest_testing/district_04-oakland/07__03_report__district_04-oakland__organization_name_city-of-rio-vista.ipynb b/portfolio/gtfs_digest_testing/district_04-oakland/07__03_report__district_04-oakland__organization_name_city-of-rio-vista.ipynb new file mode 100644 index 000000000..95ce86883 --- /dev/null +++ b/portfolio/gtfs_digest_testing/district_04-oakland/07__03_report__district_04-oakland__organization_name_city-of-rio-vista.ipynb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15f4e5c92262c7ac28bd65654b2b88e8f4fe0cbd5518c212b2a02e42ec368ecb +size 107689 diff --git a/portfolio/gtfs_digest_testing/district_04-oakland/08__03_report__district_04-oakland__organization_name_city-of-santa-rosa.ipynb b/portfolio/gtfs_digest_testing/district_04-oakland/08__03_report__district_04-oakland__organization_name_city-of-santa-rosa.ipynb new file mode 100644 index 000000000..ac79ffa01 --- /dev/null +++ b/portfolio/gtfs_digest_testing/district_04-oakland/08__03_report__district_04-oakland__organization_name_city-of-santa-rosa.ipynb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f565f8f2e6e03176f79c0b3f19315bd0dd929c65e9da31a1bce962fed25a7da +size 3912836 diff --git a/portfolio/gtfs_digest_testing/district_04-oakland/09__03_report__district_04-oakland__organization_name_city-of-south-san-francisco.ipynb b/portfolio/gtfs_digest_testing/district_04-oakland/09__03_report__district_04-oakland__organization_name_city-of-south-san-francisco.ipynb new file mode 100644 index 000000000..2232f4761 --- /dev/null +++ b/portfolio/gtfs_digest_testing/district_04-oakland/09__03_report__district_04-oakland__organization_name_city-of-south-san-francisco.ipynb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca4a89ab2f5da19d3492d2d97e2d104da6fe1665ea0d2440987742c21c665de8 +size 120248 diff --git a/portfolio/gtfs_digest_testing/district_04-oakland/0__03_report__district_04-oakland__organization_name_city-and-county-of-san-francisco.ipynb b/portfolio/gtfs_digest_testing/district_04-oakland/0__03_report__district_04-oakland__organization_name_city-and-county-of-san-francisco.ipynb deleted file mode 100644 index 1ebd2d7a4..000000000 --- a/portfolio/gtfs_digest_testing/district_04-oakland/0__03_report__district_04-oakland__organization_name_city-and-county-of-san-francisco.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:acef2217bdd29e71ba11a621b9bfdb0ee227ee1295c2a247d99b6bfaaddcd197 -size 26493289 diff --git a/portfolio/gtfs_digest_testing/district_04-oakland/10__03_report__district_04-oakland__organization_name_city-of-union-city.ipynb b/portfolio/gtfs_digest_testing/district_04-oakland/10__03_report__district_04-oakland__organization_name_city-of-union-city.ipynb new file mode 100644 index 000000000..ec80a520d --- /dev/null +++ b/portfolio/gtfs_digest_testing/district_04-oakland/10__03_report__district_04-oakland__organization_name_city-of-union-city.ipynb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8f3cddae62382e9b2f88f077e427c836091c918195bcdfe0f9c0ec50ed36e60 +size 2128835 diff --git a/portfolio/gtfs_digest_testing/district_04-oakland/11__03_report__district_04-oakland__organization_name_city-of-vacaville.ipynb b/portfolio/gtfs_digest_testing/district_04-oakland/11__03_report__district_04-oakland__organization_name_city-of-vacaville.ipynb new file mode 100644 index 000000000..6e5a82963 --- /dev/null +++ b/portfolio/gtfs_digest_testing/district_04-oakland/11__03_report__district_04-oakland__organization_name_city-of-vacaville.ipynb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adacbd767c74db0c9b89912723ff96a6c04a1e130f2fc51fb9a8f38591b623be +size 201993 diff --git a/portfolio/gtfs_digest_testing/district_04-oakland/12__03_report__district_04-oakland__organization_name_marin-county-transit-district.ipynb b/portfolio/gtfs_digest_testing/district_04-oakland/12__03_report__district_04-oakland__organization_name_marin-county-transit-district.ipynb new file mode 100644 index 000000000..ad6b8eb85 --- /dev/null +++ b/portfolio/gtfs_digest_testing/district_04-oakland/12__03_report__district_04-oakland__organization_name_marin-county-transit-district.ipynb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e2822a5b162939895ca1ee4852925b5641de5781d489dfb9ce48842e2a0e0dd +size 8025699 diff --git a/portfolio/gtfs_digest_testing/district_04-oakland/13__03_report__district_04-oakland__organization_name_napa-valley-transportation-authority.ipynb b/portfolio/gtfs_digest_testing/district_04-oakland/13__03_report__district_04-oakland__organization_name_napa-valley-transportation-authority.ipynb new file mode 100644 index 000000000..5c9bbe8e1 --- /dev/null +++ b/portfolio/gtfs_digest_testing/district_04-oakland/13__03_report__district_04-oakland__organization_name_napa-valley-transportation-authority.ipynb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f3621037fcada4bee2abe695fe5aef93813c19d9480bff4f132848b04eaedd2 +size 2851692 diff --git a/portfolio/gtfs_digest_testing/district_04-oakland/14__03_report__district_04-oakland__organization_name_peninsula-corridor-joint-powers-board.ipynb b/portfolio/gtfs_digest_testing/district_04-oakland/14__03_report__district_04-oakland__organization_name_peninsula-corridor-joint-powers-board.ipynb new file mode 100644 index 000000000..1180d8e70 --- /dev/null +++ b/portfolio/gtfs_digest_testing/district_04-oakland/14__03_report__district_04-oakland__organization_name_peninsula-corridor-joint-powers-board.ipynb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13d2b6583636f49435b4502ef2c53db0a322c4ab8d51f932c25d93e9a95812fa +size 1988691 diff --git a/portfolio/gtfs_digest_testing/district_04-oakland/15__03_report__district_04-oakland__organization_name_presidio-trust.ipynb b/portfolio/gtfs_digest_testing/district_04-oakland/15__03_report__district_04-oakland__organization_name_presidio-trust.ipynb new file mode 100644 index 000000000..4f6a025f8 --- /dev/null +++ b/portfolio/gtfs_digest_testing/district_04-oakland/15__03_report__district_04-oakland__organization_name_presidio-trust.ipynb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:023b18227191b8431a30e080c0d5d8c8d1f450660d87cb63eeffce0399bb46a1 +size 648549 diff --git a/portfolio/gtfs_digest_testing/district_04-oakland/16__03_report__district_04-oakland__organization_name_san-francisco-bay-area-rapid-transit-district.ipynb b/portfolio/gtfs_digest_testing/district_04-oakland/16__03_report__district_04-oakland__organization_name_san-francisco-bay-area-rapid-transit-district.ipynb new file mode 100644 index 000000000..96cc0891c --- /dev/null +++ b/portfolio/gtfs_digest_testing/district_04-oakland/16__03_report__district_04-oakland__organization_name_san-francisco-bay-area-rapid-transit-district.ipynb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72766d2f5a64a587c2431c918d37f2dc3d1ef543017a4fa73061044caea8175c +size 771406 diff --git a/portfolio/gtfs_digest_testing/district_04-oakland/1__03_report__district_04-oakland__organization_name_san-francisco-bay-area-rapid-transit-district.ipynb b/portfolio/gtfs_digest_testing/district_04-oakland/1__03_report__district_04-oakland__organization_name_san-francisco-bay-area-rapid-transit-district.ipynb deleted file mode 100644 index d991bf97b..000000000 --- a/portfolio/gtfs_digest_testing/district_04-oakland/1__03_report__district_04-oakland__organization_name_san-francisco-bay-area-rapid-transit-district.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2286ce58f0c26d097b5eb8764a77d8dce88e3f3aff60e9ccce12b9939e6148fb -size 771432 diff --git a/portfolio/gtfs_digest_testing/district_04-oakland/2__03_report__district_04-oakland__organization_name_alameda-contra-costa-transit-district.ipynb b/portfolio/gtfs_digest_testing/district_04-oakland/2__03_report__district_04-oakland__organization_name_alameda-contra-costa-transit-district.ipynb deleted file mode 100644 index 91f58b2d3..000000000 --- a/portfolio/gtfs_digest_testing/district_04-oakland/2__03_report__district_04-oakland__organization_name_alameda-contra-costa-transit-district.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4eac0c4daa44556f6464cd68d7a4957881a9e70379235a5084e4b4cb1f8e81bd -size 45115657 diff --git a/portfolio/sites/gtfs_digest_testing.yml b/portfolio/sites/gtfs_digest_testing.yml index 783e47cb9..2c0108e4b 100644 --- a/portfolio/sites/gtfs_digest_testing.yml +++ b/portfolio/sites/gtfs_digest_testing.yml @@ -6,9 +6,22 @@ parts: params: district: 04 - Oakland sections: + - organization_name: Alameda-Contra Costa Transit District + - organization_name: Capitol Corridor Joint Powers Authority + - organization_name: Central Contra Costa Transit Authority - organization_name: City and County of San Francisco + - organization_name: City of Fairfield + - organization_name: City of Menlo Park + - organization_name: City of Petaluma + - organization_name: City of Rio Vista + - organization_name: City of Santa Rosa + - organization_name: City of South San Francisco + - organization_name: City of Union City + - organization_name: City of Vacaville + - organization_name: Marin County Transit District + - organization_name: Napa Valley Transportation Authority + - organization_name: Peninsula Corridor Joint Powers Board + - organization_name: Presidio Trust - organization_name: San Francisco Bay Area Rapid Transit District - - organization_name: Alameda-Contra Costa Transit District - readme: ./gtfs_digest/README.md -title: GTFS Digest +title: GTFS Digest TEST From 047add8f93794ebf7f9cfd57187f1f5fbca760ae Mon Sep 17 00:00:00 2001 From: amandaha8 Date: Thu, 13 Jun 2024 23:39:03 +0000 Subject: [PATCH 2/3] removed old cardinal dir stuff --- gtfs_digest/_section2_utils.py | 219 --------------------------------- 1 file changed, 219 deletions(-) diff --git a/gtfs_digest/_section2_utils.py b/gtfs_digest/_section2_utils.py index 69373fe64..358dffe32 100644 --- a/gtfs_digest/_section2_utils.py +++ b/gtfs_digest/_section2_utils.py @@ -36,225 +36,6 @@ Schedule_vp_metrics Functions """ -def load_most_current_date() -> str: - # from shared_utils import rt_utils - dates_dictionary = rt_dates.DATES - date_list = list(dates_dictionary.items()) - # Grab the last key-value pair - last_key, last_value = date_list[-1] - return last_value - -def load_scheduled_stop_times(date: str, gtfs_schedule_key: list) -> pd.DataFrame: - stop_times_col = [ - "feed_key", - "stop_id", - "stop_sequence", - "schedule_gtfs_dataset_key", - "trip_instance_key", - "shape_array_key", - "stop_name", - "prior_stop_sequence", - "subseq_stop_sequence", - "stop_pair", - "stop_pair_name", - "stop_primary_direction", - "stop_meters", - ] - stop_times_df = helpers.import_scheduled_stop_times( - date, - filters=[[("schedule_gtfs_dataset_key", "in", gtfs_schedule_key)]], - columns=stop_times_col, - get_pandas=True, - with_direction=True, - ) - - stop_times_df["service_date"] = date - return stop_times_df - -def load_scheduled_trips(date: str, gtfs_schedule_key: list) -> pd.DataFrame: - scheduled_col = [ - "route_id", - "trip_instance_key", - "gtfs_dataset_key", - "shape_array_key", - "direction_id", - "route_long_name", - "route_short_name", - "route_desc", - "name" - ] - - scheduled_trips_df = helpers.import_scheduled_trips( - date, - filters=[[("gtfs_dataset_key", "in", gtfs_schedule_key)]], - columns=scheduled_col, - ) - - scheduled_trips_df["service_date"] = date - return scheduled_trips_df - - -def find_most_common_dir( - scheduled_trips_df: pd.DataFrame, - scheduled_stop_times_df: pd.DataFrame, -) -> pd.DataFrame: - """ - Load load_scheduled_trips() and load_scheduled_stop_times() - """ - - # Merge dfs - merge_cols = [ - "trip_instance_key", - "schedule_gtfs_dataset_key", - "shape_array_key", - "service_date", - ] - - df = delayed(pd.merge)( - scheduled_trips_df, - scheduled_stop_times_df, - on=merge_cols, - how="inner", - ) - - agg1 = ( - df.groupby( - [ - "route_id", - "schedule_gtfs_dataset_key", - "direction_id", - "stop_primary_direction", - "service_date", - ] - ) - .agg({"stop_sequence": "count"}) - .reset_index() - .rename(columns={"stop_sequence": "total_stops"}) - ) - - # Sort and drop duplicates so that the - # largest # of stops by stop_primary_direction is at the top - agg2 = agg1.sort_values( - by=["route_id", - "schedule_gtfs_dataset_key", - "direction_id", - "service_date", - "total_stops"], - ascending=[True, True, True, True, False], - ) - - # Drop duplicates so only the top stop_primary_direction is kept. - agg3 = agg2.drop_duplicates( - subset=[ - "route_id", - "schedule_gtfs_dataset_key", - "direction_id", - "service_date" - ] - ).reset_index(drop=True) - - agg3 = agg3.drop(columns=["total_stops"]) - return agg3 - - -def most_recent_route_info( - df: pd.DataFrame, - group_cols: list, - route_col: str -) -> pd.DataFrame: - """ - Find the most recent value across a grouping. - Ex: if we group by route_id, we can find the most recent - value for route_long_name. - - Needs a date column to work. - """ - sort_order = [True for c in group_cols] - - most_recent = (df.sort_values(group_cols + ["service_date"], - ascending = sort_order + [False]) - .drop_duplicates(subset = group_cols) - .rename(columns = {route_col: f"recent_{route_col}"}) - ) - - - df2 = delayed(pd.merge)( - df, - most_recent[group_cols + [f"recent_{route_col}"]], - on = group_cols, - how = "left" - ) - return most_recent - -def find_most_recent_route_id(df): - df = df.assign( - route_id=df.route_id.fillna(""), - route_short_name=df.route_short_name.fillna(""), - route_long_name=df.route_long_name.fillna(""), - ) - df = df.assign(combined_name=df.route_short_name + "__" + df.route_long_name) - - df = df.assign( - route_id2=df.apply( - lambda x: gtfs_schedule_wrangling.standardize_route_id( - x, "name", "route_id" - ), - axis=1, - ) - ) - - route_cols = ["schedule_gtfs_dataset_key", "name", "route_id2"] - - df2 =most_recent_route_info( - df, group_cols=route_cols, route_col="combined_name" - ).pipe( - most_recent_route_info, - group_cols=["schedule_gtfs_dataset_key", "name", "recent_combined_name"], - route_col="route_id2", - ) - - to_keep_cols = ["schedule_gtfs_dataset_key", "route_id","service_date", "recent_route_id2"] - df2 = df2[to_keep_cols] - return df2 - -def find_cardinal_direction(date:str, gtfs_schedule_keys: list) -> pd.DataFrame: - # Grab all available dates for these dataframes - # Load the 2 dataframes - scheduled_trips_dd = delayed(load_scheduled_trips(date, gtfs_schedule_keys)) - scheduled_stops_dd = delayed(load_scheduled_stop_times(date, gtfs_schedule_keys)) - - # Find the most common direction for this Route ID - common_stops_dd = find_most_common_dir(scheduled_trips_dd, scheduled_stops_dd) - - # Find the most recent Route ID to connect back to sched_vp_df - recent_ids_dd = find_most_recent_route_id(scheduled_trips_dd) - - # Merge this - m1 = delayed(pd.merge)( - common_stops_dd, - recent_ids_dd, - on=["schedule_gtfs_dataset_key", "route_id", "service_date"], - how="inner", - ) - - m1 = m1.drop(columns = ["route_id"]) - - return m1 - -def all_dates_cardinal_dir(dates:list, gtfs_schedule_keys:list)->pd.DataFrame: - full_df = pd.DataFrame() - for date in dates: - df = find_cardinal_direction(date, gtfs_schedule_keys) - df = df.compute() - full_df = pd.concat([full_df, df], axis=0) - to_keep = ["schedule_gtfs_dataset_key", - "direction_id", - "recent_route_id2", - "stop_primary_direction", - "service_date"] - full_df = full_df[to_keep] - return full_df - def load_schedule_vp_metrics(organization:str)->pd.DataFrame: schd_vp_url = f"{GTFS_DATA_DICT.digest_tables.dir}{GTFS_DATA_DICT.digest_tables.route_schedule_vp}.parquet" From 48889679fecc4e3046584256a5b21a8a6fd63b06 Mon Sep 17 00:00:00 2001 From: amandaha8 Date: Wed, 19 Jun 2024 16:44:55 +0000 Subject: [PATCH 3/3] deleted old files --- portfolio/gtfs_digest_testing/README.md | 25 ------------------- .../gtfs_digest_testing/district_01-eureka.md | 1 - .../district_02-redding.md | 1 - .../district_03-marysville.md | 1 - .../district_04-oakland.md | 1 - .../district_07-los-angeles.md | 1 - 6 files changed, 30 deletions(-) delete mode 100644 portfolio/gtfs_digest_testing/README.md delete mode 100644 portfolio/gtfs_digest_testing/district_01-eureka.md delete mode 100644 portfolio/gtfs_digest_testing/district_02-redding.md delete mode 100644 portfolio/gtfs_digest_testing/district_03-marysville.md delete mode 100644 portfolio/gtfs_digest_testing/district_04-oakland.md delete mode 100644 portfolio/gtfs_digest_testing/district_07-los-angeles.md diff --git a/portfolio/gtfs_digest_testing/README.md b/portfolio/gtfs_digest_testing/README.md deleted file mode 100644 index 7780d50e5..000000000 --- a/portfolio/gtfs_digest_testing/README.md +++ /dev/null @@ -1,25 +0,0 @@ -# GTFS Digest -This portfolio houses performance metrics from GTFS schedule and vehicle positions time-series data for all transit operators by route. - -To download our processed full data that powers this portfolio, please navigate to the folder titled `gtfs_digest` [here](https://console.cloud.google.com/storage/browser/calitp-publish-data-analysis). You will find the most recent datasets in `.parquet, .csv,.geojson` formats. Match the [readable column names](https://github.com/cal-itp/data-analyses/blob/main/gtfs_digest/readable.yml) to the table names. The data pulled from the Federal Transit Administration's National Transit Data is located [here](https://www.transit.dot.gov/ntd/data-product/2022-annual-database-agency-information). -## Common Questions -To read about the methodology, please visit [here](https://github.com/cal-itp/data-analyses/blob/main/gtfs_digest/methodology.md).
-**Why is time-series table sampling single days?** - -GTFS provides us with extremely detailed information, such as the time a bus is scheduled to arrive at a stop, and the GPS coordinates of a bus at a given timestamp. When working with granular data like this, a single day statewide can be a very large table. - -For context, on our sampled date in January 2024 there were 100k+ trips and 3.6 million+ stop arrivals, and that's just scheduled data. Our vehicle positions table, after deduplicating in our warehouse, had 15 million+ rows. On top of that, each operator can have a quartet of GTFS data (1 schedule table + 3 real-time tables). - -Getting our pipeline right is fairly complex for a single day. Our warehouse has a set of internal keys to ensure we're matching trip for trip across quartets. If you factor in the fact that operators can update their GTFS feeds at any time in the month, there are a lot of things that are changing! - -We do have monthly aggregations on our roadmap, but for now, we're building out our own time-series tables of processed data, and working through the kinks of being able to track the same route over time (as feeds get updated, identifiers change, etc). We will be starting with schedule data to figure out how to produce monthly aggregations in a scalable way. - -**How does GTFS Digest fit into SB 125 performance metrics?** - -[SB 125](https://calsta.ca.gov/subject-areas/sb125-transit-program) and the creation of the Transit Transformation Task Force has a section on creating performance metrics for transit operators statewide. Dive into the [legislative bill](https://legiscan.com/CA/text/SB125/id/2831757). - -The Caltrans Division of Data & Digital Services has been ingesting and collecting GTFS data in our warehouse since 2021. Our own internal effort has been to create data pipelines so that the rich and comprehensive data we collect can be processed and made available for public consumption. - -There overlaps with the goals of SB 125. There are a set of performance metrics that could be of interest to the task force, the public, and us! However, GTFS Digest is a **GTFS** digest, which means its primary focus is on metrics that can be derived purely from GTFS, and to do it statewide so we can understand transit operator performance. We based a lot of our metrics on the papers by [Professor Gregory Newmark](https://www.morgan.edu/sap/gregory-newmark) that gave us a roadmap of metrics that could be derived solely from GTFS that would create comparisons of transit operators regardless of size, service area and density. - -GTFS Digest will continue to evolve as we dive into our own warehouse! diff --git a/portfolio/gtfs_digest_testing/district_01-eureka.md b/portfolio/gtfs_digest_testing/district_01-eureka.md deleted file mode 100644 index f0060fc25..000000000 --- a/portfolio/gtfs_digest_testing/district_01-eureka.md +++ /dev/null @@ -1 +0,0 @@ -# District 01 - Eureka \ No newline at end of file diff --git a/portfolio/gtfs_digest_testing/district_02-redding.md b/portfolio/gtfs_digest_testing/district_02-redding.md deleted file mode 100644 index 128b1bbf9..000000000 --- a/portfolio/gtfs_digest_testing/district_02-redding.md +++ /dev/null @@ -1 +0,0 @@ -# District 02 - Redding \ No newline at end of file diff --git a/portfolio/gtfs_digest_testing/district_03-marysville.md b/portfolio/gtfs_digest_testing/district_03-marysville.md deleted file mode 100644 index bab692d75..000000000 --- a/portfolio/gtfs_digest_testing/district_03-marysville.md +++ /dev/null @@ -1 +0,0 @@ -# District 03 - Marysville \ No newline at end of file diff --git a/portfolio/gtfs_digest_testing/district_04-oakland.md b/portfolio/gtfs_digest_testing/district_04-oakland.md deleted file mode 100644 index 1faedbdab..000000000 --- a/portfolio/gtfs_digest_testing/district_04-oakland.md +++ /dev/null @@ -1 +0,0 @@ -# District 04 - Oakland \ No newline at end of file diff --git a/portfolio/gtfs_digest_testing/district_07-los-angeles.md b/portfolio/gtfs_digest_testing/district_07-los-angeles.md deleted file mode 100644 index 79901aa5a..000000000 --- a/portfolio/gtfs_digest_testing/district_07-los-angeles.md +++ /dev/null @@ -1 +0,0 @@ -# District 07 - Los Angeles \ No newline at end of file