diff --git a/gtfs_digest/deploy_portfolio_yaml.py b/gtfs_digest/deploy_portfolio_yaml.py index 6537b2070..88db032b0 100644 --- a/gtfs_digest/deploy_portfolio_yaml.py +++ b/gtfs_digest/deploy_portfolio_yaml.py @@ -6,7 +6,7 @@ from segment_speed_utils.project_vars import RT_SCHED_GCS -PORTFOLIO_SITE_YAML = Path("../portfolio/sites/gtfs_digest.yml") +PORTFOLIO_SITE_YAML = Path("../portfolio/sites/digest_typologies.yml") def overwrite_yaml(portfolio_site_yaml: Path) -> list: """ @@ -29,7 +29,19 @@ def overwrite_yaml(portfolio_site_yaml: Path) -> list: districts = sorted(list(df.caltrans_district.unique())) - operators = df.organization_name.tolist() + operators = df.organization_name.tolist() + + keep_me = ["City of Santa Monica", "City of Culver City", + "Long Beach Transit", "Southern California Regional Rail Authority", + "Foothill Transit", + "Alameda-Contra Costa Transit District", + "City and County of San Francisco", + "Sonoma-Marin Area Rail Transit District", + "Marin County Transit District", + ] + operators = [i for i in operators if i in keep_me] + districts = sorted(df[df.organization_name.isin(keep_me)].caltrans_district.unique()) + # Eric's example # https://github.com/cal-itp/data-analyses/blob/main/rt_delay/04_generate_all.ipynb diff --git a/gtfs_digest/extra_cleaning.py b/gtfs_digest/extra_cleaning.py deleted file mode 100644 index b27893639..000000000 --- a/gtfs_digest/extra_cleaning.py +++ /dev/null @@ -1,28 +0,0 @@ -# operators that need route names parsed -# probably just keep the long name portion -# also do titlecase? this is finicky, because some are CC (which we don't want titlecase) -operators_only_route_long_name = [ - "Antelope Valley Transit Authority Schedule", - "Bay Area 511 ACE Schedule", - "Bay Area 511 Caltrain Schedule", - "Bay Area 511 Emery Go-Round Schedule", - "Bay Area 511 Petaluma Schedule", - "Beach Cities GMV Schedule", - "Bear Schedule", - "Commerce Schedule", - "Elk Grove Schedule", - "Humboldt Schedule", - "LA DOT Schedule", - "Lawndale Beat GMV Schedule", - "Redding Schedule", - "Redwood Coast Schedule", - "Santa Maria Schedule", - "StanRTA Schedule", - "VCTC GMV Schedule", - "Victor Valley GMV Schedule", - "Visalia Schedule", - "Yolobus Schedule", -] - -# BruinBus Schedule - nothing shows up in route stats -# Why does "StanRTA Schedule" and "Tahoe Transportation District Schedule" appear to have similar route names? \ No newline at end of file diff --git a/gtfs_digest/merge_data.py b/gtfs_digest/merge_data.py index 3840ca992..7da589ce9 100644 --- a/gtfs_digest/merge_data.py +++ b/gtfs_digest/merge_data.py @@ -136,54 +136,37 @@ def concatenate_crosswalk_organization( def merge_in_standardized_route_names( df: pd.DataFrame, ) -> pd.DataFrame: + keep_cols = [ "schedule_gtfs_dataset_key", "name", "route_id", "service_date", - "recent_route_id2", "recent_combined_name"] + ] CLEAN_ROUTES = GTFS_DATA_DICT.schedule_tables.route_identification - operators_need_cleaning = pd.read_parquet( - f"{SCHED_GCS}{CLEAN_ROUTES}.parquet", - filters = [[("name", "in", operators_only_route_long_name)]] - ) - operators_ok = pd.read_parquet( - f"{SCHED_GCS}{CLEAN_ROUTES}.parquet", - filters = [[("name", "not in", operators_only_route_long_name)]] - ) + route_names_df = pd.read_parquet(f"{SCHED_GCS}{CLEAN_ROUTES}.parquet") - operators_need_cleaning = operators_need_cleaning.assign( - recent_combined_name = operators_need_cleaning.route_long_name - ) + route_names_df = time_series_utils.clean_standardized_route_names( + route_names_df).drop_duplicates() - standardized_route_names = pd.concat([ - operators_need_cleaning, - operators_ok - ], axis=0, ignore_index=True)[keep_cols] - if "name" in df.columns: df = df.drop(columns = "name") - df = pd.merge( + # Use `route_id` to merge to standardized_route_names + df2 = pd.merge( df, - standardized_route_names, + route_names_df, on = ["schedule_gtfs_dataset_key", "route_id", "service_date"], how = "left", ) - df = df.assign( - recent_combined_name = df.recent_combined_name.str.replace("__", " ") - ).drop( - columns = ["route_id"] - ).rename( - columns = { - "recent_route_id2": "route_id", - "recent_combined_name": "route_combined_name" - } - ) + # After merging, we can replace route_id with recent_route_id2 + drop_cols = ["route_desc", "combined_name", "route_id2"] + df3 = time_series_utils.parse_route_combined_name(df2).drop( + columns = drop_cols) - return df + return df3 if __name__ == "__main__": @@ -237,7 +220,7 @@ def merge_in_standardized_route_names( "is_early", "is_ontime", "is_late" ] - df[integrify] = df[integrify].astype("Int64") + df[integrify] = df[integrify].fillna(0).astype("int") df.to_parquet( f"{RT_SCHED_GCS}{DIGEST_RT_SCHED}.parquet" diff --git a/gtfs_digest/readable.yml b/gtfs_digest/readable.yml new file mode 100644 index 000000000..6e2982733 --- /dev/null +++ b/gtfs_digest/readable.yml @@ -0,0 +1,66 @@ +# GTFS schedule +direction_id: + readable: Direction + caption: Something +avg_scheduled_service_minutes: "Average Scheduled Service (trip minutes)" +avg_stop_miles: Average Stop Distance (miles) +ttl_service_hours: total service (hours) +n_scheduled_trips: "# scheduled trips" +frequency: Trips per Hour +total_scheduled_service_minutes: "Aggregate Scheduled Service Minutes (all trips)" +route_id: Route +route_combined_name: Route + +# GTFS vehicle positions +minutes_atleast1_vp: "# minutes with 1+ vp per minute" +minutes_atleast2_vp: "# minutes with 2+ vp per minute" +total_rt_service_minutes: "Aggregate Actual Service Minutes (all trips)" +total_vp: "# vp" +vp_in_shape: "# vp within scheduled shape" +is_early": "# early arrival trips" +is_ontime: "# on-time trips" +is_late: "# late trips" +n_vp_trips: "# trips with vp" +vp_per_minute: "Average vp per minute" +pct_in_shape: "% vp within scheduled shape" +pct_rt_journey_atleast1_vp: "% actual trip minutes with 1+ vp per minute" +pct_rt_journey_atleast2_vp: "% actual trip minutes with 2+ vp per minute" +pct_sched_journey_atleast1_vp: "% scheduled trip minutes with 1+ vp per minute" +pct_sched_journey_atleast2_vp: "% scheduled trip minutes with 2+ vp per minute" +rt_sched_journey_ratio: "Actual / scheduled service ratio" +avg_rt_service_minutes: "Average Actual Service (trip minutes)" +speed_mph: "Speed (mph)" +sched_rt_category: "GTFS Availability" + +# Operators +organization_source_record_id: "Organization ID" +organization_name: Organization +name: Transit Operator +caltrans_district: District +base64_url: "base64 encoded feed URL" +operator_n_routes: "# routes" +operator_n_trips: "# trips" +operator_n_shapes: "# shapes" +operator_n_stops: "# stops" +operator_n_arrivals: "# arrivals" +operator_route_length_miles: "service miles" +operator_arrivals_per_stop: "avg arrivals per stop" +n_coverage_routes: "# coverage route types" +n_downtown_local_routes: "# downtown local route types" +n_local_routes: "# local route types" +n_rapid_routes: "# rapid route types" + + +# Dates / time +time_period: Period +service_date: Date +time_of_day: "time of day" +day_name: "Day of Week" +year: Year +month: Month +year_month: "Month - Year" + +# Roads +road_freq_category: "Road Frequency Category" +road_typology: "Road Typology Category" +pct_typology: "% route miles with typology" \ No newline at end of file diff --git a/gtfs_funnel/concatenate_monthly_scheduled_service.py b/gtfs_funnel/concatenate_monthly_scheduled_service.py index d49f70db9..85596e3fb 100644 --- a/gtfs_funnel/concatenate_monthly_scheduled_service.py +++ b/gtfs_funnel/concatenate_monthly_scheduled_service.py @@ -14,7 +14,8 @@ MONTHLY_SERVICE = GTFS_DATA_DICT.schedule_tables.monthly_scheduled_service CROSSWALK = GTFS_DATA_DICT.schedule_tables.gtfs_key_crosswalk - + ROUTES = GTFS_DATA_DICT.schedule_tables.route_identification + year_list = [2023, 2024] analysis_date_list = (rt_dates.y2024_dates + rt_dates.y2023_dates + @@ -47,11 +48,24 @@ "schedule_source_record_id", "schedule_gtfs_dataset_key", "organization_source_record_id", "organization_name", - ], + ] ).drop( columns = "service_date" - ).drop_duplicates().reset_index(drop=True) + ).drop_duplicates().reset_index(drop=True) + + # Get standardized route names and clean up more + standardized_routes = pd.read_parquet(f"{SCHED_GCS}{ROUTES}.parquet") + + route_names_df = time_series_utils.clean_standardized_route_names( + standardized_routes).pipe( + time_series_utils.parse_route_combined_name + )[ + ["schedule_gtfs_dataset_key", + "route_long_name", "route_short_name", + "route_id", "route_combined_name"] + ].drop_duplicates() + # Merge monthly service with crosswalk to get schedule_gtfs_dataset_key df2 = pd.merge( df, crosswalk, @@ -59,6 +73,15 @@ how = "inner", ) - df2.to_parquet( + # Merge in route_names so we use the standardized/cleaned up route names + df3 = pd.merge( + df2, + route_names_df, + on = ["schedule_gtfs_dataset_key", + "route_long_name", "route_short_name"], + how = "left", + ) + + df3.to_parquet( f"{SCHED_GCS}{MONTHLY_SERVICE}.parquet" ) \ No newline at end of file diff --git a/portfolio/digest_typologies/README.md b/portfolio/digest_typologies/README.md new file mode 100644 index 000000000..4aca9d8a1 --- /dev/null +++ b/portfolio/digest_typologies/README.md @@ -0,0 +1,5 @@ +# GTFS Digest + +Performance metrics from GTFS schedule and vehicle positions time-series data for all transit operators by route. + +[data:image/s3,"s3://crabby-images/062e8/062e8da88ea13e7ff1671ff2d71d08901b045dc3" alt="digest_mermaid"](https://mermaid.live/edit#pako:eNqVk1Fv2jAQx7-K5QnlJUGEhAKeNKkU2NOeWu1hZIpMfAGrjh3Zl7WA-O5zknXQba20PDin-_98Z9-dT7QwAiijURRlGiUqYOTzw_qeLOUOHGa6EwaDU6YJkVoiI51JSIB7qCBgJNhyB0F47f3KreRbBS74jXuptrLi9nBnlLHtvg-ryXqyvn3ZeiEe4Bkv1Gg0-htZGCvAvgUpqeEtzUFhtHh9jvV6ulpcMQgW5SukLMugl8_tzy_nwSDTmS6VeSr23CJ5WPSAa7Y7y-s9saZBiIS0UKA0ulf7tVDcuSWURJSklEqxX7X4Q3WFlTW-EN0pr6O03-3GFXsQjQJSAVpZuO-MMR82ij4R0TXx44VebFxTtfUjrgYQ_2CvQt9tLOY_XP4_CZabHZYuf4RDjiY3dse1PPL2-qSwxrknrh7f27_aOORte4Q8gsi7EuaaV_Bu1t4CLS4uGtIKbMWl8NPdzWBGu9nMKPOmgJI3CjPqG-lR3qC5P-iCMrQNhLSpBUdYSu47WVFWcuW8F4REY7_0L6Z7OCGtuabsRJ8pi2bjYTJPxvM0mc6S-ThOQ3qgLLkZxulklt6kaTwZzdL4HNKjMT5qPBzHfjZH8TRJPJ1O0y7ct07sUp5_AvaOGGY) \ No newline at end of file diff --git a/portfolio/digest_typologies/_config.yml b/portfolio/digest_typologies/_config.yml new file mode 100644 index 000000000..89e63588f --- /dev/null +++ b/portfolio/digest_typologies/_config.yml @@ -0,0 +1,43 @@ +# Book settings +# Learn more at https://jupyterbook.org/customize/config.html + +title: Transit Operators and Route Typologies +author: Cal-ITP +copyright: "2024" +#logo: calitp_logo_MAIN.png + +# Force re-execution of notebooks on each build. +# See https://jupyterbook.org/content/execute.html +execute: + execute_notebooks: 'off' + allow_errors: false + timeout: -1 + +# Define the name of the latex output file for PDF builds +latex: + latex_documents: + targetname: book.tex + +launch_buttons: + binderhub_url: "https://mybinder.org" + jupyterhub_url: "https://hubtest.k8s.calitp.jarv.us" + thebe: true + +repository: + url: https://github.com/cal-itp/data-analyses/ # Online location of your book +# path_to_book: docs # Optional path to your book, relative to the repository root + path_to_book: gtfs_digest + branch: main # Which branch of the repository should be used when creating links (optional) + +# Add GitHub buttons to your book +# See https://jupyterbook.org/customize/config.html#add-a-link-to-your-repository +html: + use_issues_button: true + use_repository_button: true + use_edit_page_button: true + google_analytics_id: 'G-JCX3Z8JZJC' + +sphinx: + config: + html_js_files: + - https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js \ No newline at end of file diff --git a/portfolio/digest_typologies/_toc.yml b/portfolio/digest_typologies/_toc.yml new file mode 100644 index 000000000..7ec7bfc5c --- /dev/null +++ b/portfolio/digest_typologies/_toc.yml @@ -0,0 +1,11 @@ +format: jb-book +parts: +- caption: null + chapters: + - file: district_04-oakland.md + sections: + - glob: district_04-oakland/* + - file: district_07-los-angeles.md + sections: + - glob: district_07-los-angeles/* +root: README diff --git a/portfolio/digest_typologies/district_04-oakland.md b/portfolio/digest_typologies/district_04-oakland.md new file mode 100644 index 000000000..1faedbdab --- /dev/null +++ b/portfolio/digest_typologies/district_04-oakland.md @@ -0,0 +1 @@ +# District 04 - Oakland \ No newline at end of file diff --git a/portfolio/digest_typologies/district_04-oakland/0__typologies__district_04-oakland__name_alameda-contra-costa-transit-district.ipynb b/portfolio/digest_typologies/district_04-oakland/0__typologies__district_04-oakland__name_alameda-contra-costa-transit-district.ipynb new file mode 100644 index 000000000..c2a2bd74e --- /dev/null +++ b/portfolio/digest_typologies/district_04-oakland/0__typologies__district_04-oakland__name_alameda-contra-costa-transit-district.ipynb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1276c09765845621b2dc6bb6675d61d097fd8a6ec6fb14da638a47026bab5a4 +size 12303490 diff --git a/portfolio/digest_typologies/district_04-oakland/1__typologies__district_04-oakland__name_central-contra-costa-transit-authority.ipynb b/portfolio/digest_typologies/district_04-oakland/1__typologies__district_04-oakland__name_central-contra-costa-transit-authority.ipynb new file mode 100644 index 000000000..ec6ba3b25 --- /dev/null +++ b/portfolio/digest_typologies/district_04-oakland/1__typologies__district_04-oakland__name_central-contra-costa-transit-authority.ipynb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:506dc5a99b5aa462e3b4e3ab2ec9012d575c6417a05a38549e6b8f2b2f1066ee +size 3976205 diff --git a/portfolio/digest_typologies/district_04-oakland/2__typologies__district_04-oakland__name_city-and-county-of-san-francisco.ipynb b/portfolio/digest_typologies/district_04-oakland/2__typologies__district_04-oakland__name_city-and-county-of-san-francisco.ipynb new file mode 100644 index 000000000..5d070b04e --- /dev/null +++ b/portfolio/digest_typologies/district_04-oakland/2__typologies__district_04-oakland__name_city-and-county-of-san-francisco.ipynb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:606bbf751b2557e15390740232a686baf88106fe0a8f9ea38f6ba4967ada83cb +size 3285371 diff --git a/portfolio/digest_typologies/district_04-oakland/3__typologies__district_04-oakland__name_eastern-contra-costa-transit-authority.ipynb b/portfolio/digest_typologies/district_04-oakland/3__typologies__district_04-oakland__name_eastern-contra-costa-transit-authority.ipynb new file mode 100644 index 000000000..677c41fcb --- /dev/null +++ b/portfolio/digest_typologies/district_04-oakland/3__typologies__district_04-oakland__name_eastern-contra-costa-transit-authority.ipynb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b9247a3914d486cba04475280240c715bbb9dd77c4296581586b65c1131bfd9 +size 1193675 diff --git a/portfolio/digest_typologies/district_04-oakland/4__typologies__district_04-oakland__name_marin-county-transit-district.ipynb b/portfolio/digest_typologies/district_04-oakland/4__typologies__district_04-oakland__name_marin-county-transit-district.ipynb new file mode 100644 index 000000000..ceaae43e2 --- /dev/null +++ b/portfolio/digest_typologies/district_04-oakland/4__typologies__district_04-oakland__name_marin-county-transit-district.ipynb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7156f50617a1067405dd4ec39a396a6948415b26f89b80f14fba2ee5231a2d5 +size 1926753 diff --git a/portfolio/digest_typologies/district_04-oakland/5__typologies__district_04-oakland__name_santa-clara-valley-transportation-authority.ipynb b/portfolio/digest_typologies/district_04-oakland/5__typologies__district_04-oakland__name_santa-clara-valley-transportation-authority.ipynb new file mode 100644 index 000000000..b0affecd0 --- /dev/null +++ b/portfolio/digest_typologies/district_04-oakland/5__typologies__district_04-oakland__name_santa-clara-valley-transportation-authority.ipynb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3b8b1172c30472762b20fe87bbb16653c30e7f21c38114606af41b087c97e2c +size 4497864 diff --git a/portfolio/digest_typologies/district_04-oakland/6__typologies__district_04-oakland__name_sonoma-county.ipynb b/portfolio/digest_typologies/district_04-oakland/6__typologies__district_04-oakland__name_sonoma-county.ipynb new file mode 100644 index 000000000..32fe043e6 --- /dev/null +++ b/portfolio/digest_typologies/district_04-oakland/6__typologies__district_04-oakland__name_sonoma-county.ipynb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05d2cdf6ca57f45fb00c5dedbce8287fba685f851e14157d7eb024a15792d5a0 +size 928009 diff --git a/portfolio/digest_typologies/district_07-los-angeles.md b/portfolio/digest_typologies/district_07-los-angeles.md new file mode 100644 index 000000000..79901aa5a --- /dev/null +++ b/portfolio/digest_typologies/district_07-los-angeles.md @@ -0,0 +1 @@ +# District 07 - Los Angeles \ No newline at end of file diff --git a/portfolio/digest_typologies/district_07-los-angeles/0__typologies__district_07-los-angeles__name_city-of-culver-city.ipynb b/portfolio/digest_typologies/district_07-los-angeles/0__typologies__district_07-los-angeles__name_city-of-culver-city.ipynb new file mode 100644 index 000000000..b54bfae5e --- /dev/null +++ b/portfolio/digest_typologies/district_07-los-angeles/0__typologies__district_07-los-angeles__name_city-of-culver-city.ipynb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f53075c3f2f59b845694cf8b0074e4b51210b0175f7aa9f3102812f9e1bddcdd +size 917899 diff --git a/portfolio/digest_typologies/district_07-los-angeles/1__typologies__district_07-los-angeles__name_city-of-los-angeles.ipynb b/portfolio/digest_typologies/district_07-los-angeles/1__typologies__district_07-los-angeles__name_city-of-los-angeles.ipynb new file mode 100644 index 000000000..ab90ab766 --- /dev/null +++ b/portfolio/digest_typologies/district_07-los-angeles/1__typologies__district_07-los-angeles__name_city-of-los-angeles.ipynb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5ce36d0c5af0424b262b29cc0e3186d0938ef9339c1dc83455b0df988f3355e +size 2841414 diff --git a/portfolio/digest_typologies/district_07-los-angeles/2__typologies__district_07-los-angeles__name_city-of-santa-monica.ipynb b/portfolio/digest_typologies/district_07-los-angeles/2__typologies__district_07-los-angeles__name_city-of-santa-monica.ipynb new file mode 100644 index 000000000..adbcd9571 --- /dev/null +++ b/portfolio/digest_typologies/district_07-los-angeles/2__typologies__district_07-los-angeles__name_city-of-santa-monica.ipynb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3f441f04cbd5695e09431c1236cb156cde07217b709ba0d7631fe5ddfce22df +size 1119444 diff --git a/portfolio/digest_typologies/district_07-los-angeles/3__typologies__district_07-los-angeles__name_foothill-transit.ipynb b/portfolio/digest_typologies/district_07-los-angeles/3__typologies__district_07-los-angeles__name_foothill-transit.ipynb new file mode 100644 index 000000000..90f0e7a58 --- /dev/null +++ b/portfolio/digest_typologies/district_07-los-angeles/3__typologies__district_07-los-angeles__name_foothill-transit.ipynb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b432855e8750c47c8758130b947af4b78429d096da6928436299179f2b9fe1e +size 1439613 diff --git a/portfolio/digest_typologies/district_07-los-angeles/4__typologies__district_07-los-angeles__name_long-beach-transit.ipynb b/portfolio/digest_typologies/district_07-los-angeles/4__typologies__district_07-los-angeles__name_long-beach-transit.ipynb new file mode 100644 index 000000000..2b33c1fc3 --- /dev/null +++ b/portfolio/digest_typologies/district_07-los-angeles/4__typologies__district_07-los-angeles__name_long-beach-transit.ipynb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b043b8a033cc5d219362d984759971bb6655f284def9435878f559b0d0c72c3f +size 2798382 diff --git a/portfolio/digest_typologies/district_07-los-angeles/5__typologies__district_07-los-angeles__name_los-angeles-county-metropolitan-transportation-authority.ipynb b/portfolio/digest_typologies/district_07-los-angeles/5__typologies__district_07-los-angeles__name_los-angeles-county-metropolitan-transportation-authority.ipynb new file mode 100644 index 000000000..2ea04f903 --- /dev/null +++ b/portfolio/digest_typologies/district_07-los-angeles/5__typologies__district_07-los-angeles__name_los-angeles-county-metropolitan-transportation-authority.ipynb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c75d92ac12f4c899139b1c3146098e15f8ff0be9ea1ff160bb992cda77011d9f +size 8359278 diff --git a/portfolio/digest_typologies/district_07-los-angeles/6__typologies__district_07-los-angeles__name_university-of-california-los-angeles.ipynb b/portfolio/digest_typologies/district_07-los-angeles/6__typologies__district_07-los-angeles__name_university-of-california-los-angeles.ipynb new file mode 100644 index 000000000..6d2b8e7f9 --- /dev/null +++ b/portfolio/digest_typologies/district_07-los-angeles/6__typologies__district_07-los-angeles__name_university-of-california-los-angeles.ipynb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8536176aae1e3fd2c2528e0f412e358608338ce546c720df240a4a67bd5da311 +size 313182 diff --git a/portfolio/sites/digest_typologies.yml b/portfolio/sites/digest_typologies.yml new file mode 100644 index 000000000..0baba7a4c --- /dev/null +++ b/portfolio/sites/digest_typologies.yml @@ -0,0 +1,28 @@ +directory: ./gtfs_digest/ +notebook: ./gtfs_digest/typologies.ipynb +parts: +- chapters: + - caption: District 04 - Oakland + params: + district: 04 - Oakland + sections: + - name: Alameda-Contra Costa Transit District + - name: Central Contra Costa Transit Authority + - name: City and County of San Francisco + - name: Eastern Contra Costa Transit Authority + - name: Marin County Transit District + - name: Santa Clara Valley Transportation Authority + - name: Sonoma County + - caption: District 07 - Los Angeles + params: + district: 07 - Los Angeles + sections: + - name: City of Culver City + - name: City of Los Angeles + - name: City of Santa Monica + - name: Foothill Transit + - name: Long Beach Transit + - name: Los Angeles County Metropolitan Transportation Authority + - name: University of California, Los Angeles +readme: ./gtfs_digest/README.md +title: Transit Operators and Route Typologies diff --git a/rt_segment_speeds/segment_speed_utils/time_series_utils.py b/rt_segment_speeds/segment_speed_utils/time_series_utils.py index 949ca85ac..709fa9936 100644 --- a/rt_segment_speeds/segment_speed_utils/time_series_utils.py +++ b/rt_segment_speeds/segment_speed_utils/time_series_utils.py @@ -12,7 +12,7 @@ from typing import Literal from segment_speed_utils import helpers -from segment_speed_utils.project_vars import SEGMENT_GCS +from segment_speed_utils.project_vars import SCHED_GCS, SEGMENT_GCS fs = gcsfs.GCSFileSystem() @@ -60,4 +60,68 @@ def concatenate_datasets_across_dates( if get_pandas: df = compute(df)[0] - return df \ No newline at end of file + return df + + +def clean_standardized_route_names( + df: pd.DataFrame, +) -> pd.DataFrame: + """ + Clean up route names for operators that need + additional parsing. Just keep the route_long_name instead of + combining it with route_short_name. + + TODO: do titlecase? + this is finicky, because some are CC (which we don't want titlecase) + """ + df_need_cleaning = df.loc[df.name.isin(operators_only_route_long_name)] + + df_ok = df.loc[~df.name.isin(operators_only_route_long_name)] + + df_need_cleaning = df_need_cleaning.assign( + recent_combined_name = df_need_cleaning.route_long_name + ) + + df2 = pd.concat([ + df_need_cleaning, + df_ok + ], axis=0, ignore_index=True) + + return df2 + +def parse_route_combined_name(df): + df = df.assign( + recent_combined_name = df.recent_combined_name.str.replace("__", " ") + ).drop( + columns = ["route_id"] + ).rename( + columns = { + "recent_route_id2": "route_id", + "recent_combined_name": "route_combined_name" + } + ) + + return df + +operators_only_route_long_name = [ + "Antelope Valley Transit Authority Schedule", + "Bay Area 511 ACE Schedule", + "Bay Area 511 Caltrain Schedule", + "Bay Area 511 Emery Go-Round Schedule", + "Bay Area 511 Petaluma Schedule", + "Beach Cities GMV Schedule", + "Bear Schedule", + "Commerce Schedule", + "Elk Grove Schedule", + "Humboldt Schedule", + "LA DOT Schedule", + "Lawndale Beat GMV Schedule", + "Redding Schedule", + "Redwood Coast Schedule", + "Santa Maria Schedule", + "StanRTA Schedule", + "VCTC GMV Schedule", + "Victor Valley GMV Schedule", + "Visalia Schedule", + "Yolobus Schedule", +] \ No newline at end of file