|
8 | 8 |
|
9 | 9 | from calitp_data_analysis import utils
|
10 | 10 | from segment_speed_utils import time_series_utils
|
11 |
| -from shared_utils import publish_utils |
| 11 | +from shared_utils import gtfs_utils_v2, publish_utils |
12 | 12 | from merge_data import merge_in_standardized_route_names
|
13 | 13 | from update_vars import GTFS_DATA_DICT, SCHED_GCS, RT_SCHED_GCS
|
14 | 14 |
|
@@ -120,6 +120,7 @@ def operator_category_counts_by_date() -> pd.DataFrame:
|
120 | 120 |
|
121 | 121 | ntd_cols = [
|
122 | 122 | "schedule_gtfs_dataset_key",
|
| 123 | + "caltrans_district", |
123 | 124 | "counties_served",
|
124 | 125 | "service_area_sq_miles",
|
125 | 126 | "hq_city",
|
@@ -153,13 +154,14 @@ def operator_category_counts_by_date() -> pd.DataFrame:
|
153 | 154 | )
|
154 | 155 |
|
155 | 156 | # Drop duplicates created after merging
|
156 |
| - op_profiles_df2 = (op_profiles_df1 |
157 |
| - .pipe( |
158 |
| - publish_utils.exclude_private_datasets, |
159 |
| - col = "schedule_gtfs_dataset_key", |
160 |
| - public_gtfs_dataset_keys = public_feeds |
161 |
| - ).drop_duplicates(subset = list(op_profiles_df1.columns)) |
162 |
| - .reset_index(drop = True)) |
| 157 | + op_profiles_df2 = ( |
| 158 | + op_profiles_df1 |
| 159 | + .pipe( |
| 160 | + publish_utils.exclude_private_datasets, |
| 161 | + col = "schedule_gtfs_dataset_key", |
| 162 | + public_gtfs_dataset_keys = public_feeds |
| 163 | + ).drop_duplicates(subset = list(op_profiles_df1.columns)) |
| 164 | + .reset_index(drop = True)) |
163 | 165 |
|
164 | 166 | op_profiles_df2.to_parquet(
|
165 | 167 | f"{RT_SCHED_GCS}{OPERATOR_PROFILE}.parquet"
|
|
0 commit comments