From 40380e0161e65a8d3022b8150261f50eb266dfa6 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Mon, 9 Dec 2024 20:01:52 +0000 Subject: [PATCH 1/4] update crs refs in utility packages --- _shared_utils/shared_utils/rt_utils.py | 8 +++++--- _shared_utils/shared_utils/shared_data.py | 8 ++++---- .../bus_service_utils/create_parallel_corridors.py | 6 +++--- .../segment_speed_utils/parallel_corridors.py | 6 +++--- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/_shared_utils/shared_utils/rt_utils.py b/_shared_utils/shared_utils/rt_utils.py index d36348062..3f6cf4dba 100644 --- a/_shared_utils/shared_utils/rt_utils.py +++ b/_shared_utils/shared_utils/rt_utils.py @@ -402,7 +402,7 @@ def get_vehicle_positions(ix_df: pd.DataFrame) -> gpd.GeoDataFrame: vp_all = gpd.read_parquet(f"{VP_FILE_PATH}vp_{date_str}.parquet") org_vp = vp_all >> filter(_.gtfs_dataset_key.isin(ix_df.vehicle_positions_gtfs_dataset_key)) org_vp = org_vp >> select(-_.location_timestamp, -_.service_date, -_.activity_date) - org_vp = org_vp.to_crs(geography_utils.CA_NAD83Albers) + org_vp = org_vp.to_crs(geography_utils.CA_NAD83Albers_m) utils.geoparquet_gcs_export(org_vp, GCS_FILE_PATH + V2_SUBFOLDER, filename) return org_vp @@ -459,7 +459,9 @@ def get_stops(ix_df: pd.DataFrame) -> gpd.GeoDataFrame: org_stops = gpd.read_parquet(path) else: feed_key_list = list(ix_df.feed_key.unique()) - org_stops = gtfs_utils_v2.get_stops(service_date, feed_key_list, stop_cols, crs=geography_utils.CA_NAD83Albers) + org_stops = gtfs_utils_v2.get_stops( + service_date, feed_key_list, stop_cols, crs=geography_utils.CA_NAD83Albers_m + ) utils.geoparquet_gcs_export(org_stops, GCS_FILE_PATH + V2_SUBFOLDER, filename) return org_stops @@ -478,7 +480,7 @@ def get_shapes(ix_df: pd.DataFrame) -> gpd.GeoDataFrame: else: feed_key_list = list(ix_df.feed_key.unique()) org_shapes = gtfs_utils_v2.get_shapes( - service_date, feed_key_list, crs=geography_utils.CA_NAD83Albers, shape_cols=shape_cols + service_date, feed_key_list, crs=geography_utils.CA_NAD83Albers_m, shape_cols=shape_cols ) # invalid geos are nones in new df... org_shapes = org_shapes.dropna(subset=["geometry"]) diff --git a/_shared_utils/shared_utils/shared_data.py b/_shared_utils/shared_utils/shared_data.py index 7fa30cfcc..f6b70a90a 100644 --- a/_shared_utils/shared_utils/shared_data.py +++ b/_shared_utils/shared_utils/shared_data.py @@ -19,7 +19,7 @@ def make_county_centroids(): """ URL = "https://opendata.arcgis.com/datasets/" "8713ced9b78a4abb97dc130a691a8695_0.geojson" - gdf = gpd.read_file(URL).to_crs(geography_utils.CA_StatePlane) + gdf = gpd.read_file(URL).to_crs(geography_utils.CA_NAD83Albers_ft) gdf.columns = gdf.columns.str.lower() gdf = ( @@ -167,7 +167,7 @@ def segment_highway_lines_by_postmile(gdf: gpd.GeoDataFrame): # Assign segment geometry and overwrite the postmile geometry column gdf2 = ( - gdf.assign(geometry=gpd.GeoSeries(segment_geom, crs=geography_utils.CA_NAD83Albers)) + gdf.assign(geometry=gpd.GeoSeries(segment_geom, crs=geography_utils.CA_NAD83Albers_m)) .drop(columns=drop_cols) .set_geometry("geometry") ) @@ -205,7 +205,7 @@ def create_postmile_segments( .explode("geometry") .reset_index(drop=True) .pipe(round_odometer_values, ["bodometer", "eodometer"], num_decimals=3) - .to_crs(geography_utils.CA_NAD83Albers) + .to_crs(geography_utils.CA_NAD83Albers_m) ) # Have a list accompany the geometry @@ -222,7 +222,7 @@ def create_postmile_segments( f"{GCS_FILE_PATH}state_highway_network_postmiles.parquet", columns=group_cols + ["odometer", "geometry"] ) .pipe(round_odometer_values, ["odometer"], num_decimals=3) - .to_crs(geography_utils.CA_NAD83Albers) + .to_crs(geography_utils.CA_NAD83Albers_m) ) # Round to 3 digits for odometer. When there are more decimal places, it makes our cutoffs iffy # when we use this condition below: odometer >= bodometer & odometer <= eodometer diff --git a/bus_service_increase/bus_service_utils/create_parallel_corridors.py b/bus_service_increase/bus_service_utils/create_parallel_corridors.py index ecd0df139..c0d57c1b3 100644 --- a/bus_service_increase/bus_service_utils/create_parallel_corridors.py +++ b/bus_service_increase/bus_service_utils/create_parallel_corridors.py @@ -31,8 +31,8 @@ def process_transit_routes( ## Clean transit routes df = df.assign( route_length = df.to_crs( - geography_utils.CA_StatePlane).geometry.length - ).to_crs(geography_utils.CA_StatePlane) + geography_utils.CA_NAD83Albers_ft).geometry.length + ).to_crs(geography_utils.CA_NAD83Albers_ft) # Get it down to route_id and pick longest shape df2 = (df.sort_values(operator_cols + ["route_id", "route_length"], @@ -63,7 +63,7 @@ def prep_highway_directions_for_dissolve( ''' df = (gpd.read_parquet("gs://calitp-analytics-data/data-analyses/" "shared_data/state_highway_network.parquet") - .to_crs(geography_utils.CA_StatePlane)) + .to_crs(geography_utils.CA_NAD83Albers_ft)) # Get dummies for direction # Can make data wide instead of long diff --git a/rt_segment_speeds/segment_speed_utils/parallel_corridors.py b/rt_segment_speeds/segment_speed_utils/parallel_corridors.py index 6a4cad464..2839b734c 100644 --- a/rt_segment_speeds/segment_speed_utils/parallel_corridors.py +++ b/rt_segment_speeds/segment_speed_utils/parallel_corridors.py @@ -40,8 +40,8 @@ def process_transit_routes(analysis_date: str) -> gpd.GeoDataFrame: # Get this to same CRS as highways gdf = gdf.assign( - route_length_feet = gdf.geometry.to_crs(geography_utils.CA_StatePlane).length - ).drop(columns = "route_length").to_crs(geography_utils.CA_StatePlane) + route_length_feet = gdf.geometry.to_crs(geography_utils.CA_NAD83Albers_ft).length + ).drop(columns = "route_length").to_crs(geography_utils.CA_NAD83Albers_ft) return gdf @@ -65,7 +65,7 @@ def process_highways( direction_cols = ["NB", "SB", "EB", "WB"] df = (gpd.read_parquet(SHN_FILE) - .to_crs(geography_utils.CA_StatePlane) + .to_crs(geography_utils.CA_NAD83Albers_ft) ) # Get dummies for direction From 53c4c46611034f203103b6d84604a21c0eb2d7b4 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Mon, 9 Dec 2024 20:03:26 +0000 Subject: [PATCH 2/4] update geography_utils refs to CA_NAD83Albers_m or _ft --- bus_service_increase/create_analysis_data.py | 4 ++-- bus_service_increase/highways-existing-transit.ipynb | 2 +- la_metro_demo/A2_clean_up_gtfs.py | 2 +- la_metro_demo/A3_assemble_data.py | 2 +- py_crow_flies/py_crow_flies.ipynb | 2 +- rt_segment_speeds/11_tiger.ipynb | 6 +++--- thruway_bus_validators/A2_plot_amtrak_thruway.ipynb | 4 ++-- thruway_bus_validators/A4_local_routes_same_od.py | 4 ++-- 8 files changed, 13 insertions(+), 13 deletions(-) diff --git a/bus_service_increase/create_analysis_data.py b/bus_service_increase/create_analysis_data.py index fd2495d21..fb993da85 100644 --- a/bus_service_increase/create_analysis_data.py +++ b/bus_service_increase/create_analysis_data.py @@ -134,7 +134,7 @@ def get_shapes(selected_date: str) -> gpd.GeoDataFrame: selected_date, columns = ["shape_array_key", "geometry"], get_pandas = True, - crs = geography_utils.CA_NAD83Albers + crs = geography_utils.CA_NAD83Albers_m ).pipe( helpers.remove_shapes_outside_ca ).merge( @@ -151,7 +151,7 @@ def get_shapes(selected_date: str) -> gpd.GeoDataFrame: def dissolve_census_tracts( - crs: str = geography_utils.CA_NAD83Albers + crs: str = geography_utils.CA_NAD83Albers_m ) -> gpd.GeoDataFrame: census_tracts = ( catalog.calenviroscreen_lehd_by_tract.read() diff --git a/bus_service_increase/highways-existing-transit.ipynb b/bus_service_increase/highways-existing-transit.ipynb index 131a577c5..c7d0a5844 100644 --- a/bus_service_increase/highways-existing-transit.ipynb +++ b/bus_service_increase/highways-existing-transit.ipynb @@ -78,7 +78,7 @@ "plot_df = gdf[\n", " gdf.route_length >= geography_utils.FEET_PER_MI * 0.5\n", " ].assign(\n", - " geometry = (gdf.geometry.to_crs(geography_utils.CA_StatePlane)\n", + " geometry = (gdf.geometry.to_crs(geography_utils.CA_NAD83Albers_ft)\n", " .buffer(300)\n", " .to_crs(geography_utils.WGS84)\n", " )\n", diff --git a/la_metro_demo/A2_clean_up_gtfs.py b/la_metro_demo/A2_clean_up_gtfs.py index 80a8355ab..8ba5bce71 100644 --- a/la_metro_demo/A2_clean_up_gtfs.py +++ b/la_metro_demo/A2_clean_up_gtfs.py @@ -9,7 +9,7 @@ # LA Metro data is for Oct 2022, so let's use the date we already downloaded analysis_date = rt_dates.DATES["oct2022"] -PROJECT_CRS = geography_utils.CA_NAD83Albers +PROJECT_CRS = geography_utils.CA_NAD83Albers_m def fill_missing_route_short_name(df: pd.DataFrame) -> pd.DataFrame: diff --git a/la_metro_demo/A3_assemble_data.py b/la_metro_demo/A3_assemble_data.py index f17682050..c2c82f8dd 100644 --- a/la_metro_demo/A3_assemble_data.py +++ b/la_metro_demo/A3_assemble_data.py @@ -10,7 +10,7 @@ import A2_clean_up_gtfs as clean_up_gtfs -PROJECT_CRS = geography_utils.CA_NAD83Albers +PROJECT_CRS = geography_utils.CA_NAD83Albers_m BUS_SERVICE_GCS = "gs://calitp-analytics-data/data-analyses/bus_service_increase/" diff --git a/py_crow_flies/py_crow_flies.ipynb b/py_crow_flies/py_crow_flies.ipynb index 51416fca3..22a728a0b 100644 --- a/py_crow_flies/py_crow_flies.ipynb +++ b/py_crow_flies/py_crow_flies.ipynb @@ -148,7 +148,7 @@ "outputs": [], "source": [ "# Transform the grid points to your preferred CRS\n", - "central = central.to_crs(shared_utils.geography_utils.CA_NAD83Albers).set_index('pointid')\n", + "central = central.to_crs(shared_utils.geography_utils.CA_NAD83Albers_m).set_index('pointid')\n", "central = central >> select(-_.Point_ID)" ] }, diff --git a/rt_segment_speeds/11_tiger.ipynb b/rt_segment_speeds/11_tiger.ipynb index dbc67ab87..488bff2ab 100644 --- a/rt_segment_speeds/11_tiger.ipynb +++ b/rt_segment_speeds/11_tiger.ipynb @@ -103,7 +103,7 @@ " f\"{SHARED_GCS}all_roads_2020_state06.parquet\",\n", " filters=[(\"MTFCC\", \"in\", road_type_wanted)],\n", " columns=[\"LINEARID\", \"geometry\", \"FULLNAME\"],\n", - " ).to_crs(geography_utils.CA_NAD83Albers)\n", + " ).to_crs(geography_utils.CA_NAD83Albers_m)\n", "\n", " # If a road has mutliple rows but the same\n", " # linear ID, dissolve it so it becomes one row.\n", @@ -238,7 +238,7 @@ " .drop_duplicates()\n", " )\n", "\n", - " stops = stops.set_crs(geography_utils.CA_NAD83Albers)\n", + " stops = stops.set_crs(geography_utils.CA_NAD83Albers_m)\n", "\n", " # Buffer each stop by 50 feet\n", " stops = stops.assign(buffered_geometry=stops.geometry.buffer(50))\n", @@ -287,7 +287,7 @@ " \"\"\"\n", " gtfs_shapes = helpers.import_scheduled_shapes(date).compute().drop_duplicates()\n", "\n", - " gtfs_shapes = gtfs_shapes.set_crs(geography_utils.CA_NAD83Albers)\n", + " gtfs_shapes = gtfs_shapes.set_crs(geography_utils.CA_NAD83Albers_m)\n", "\n", " trips = (\n", " helpers.import_scheduled_trips(date, (), [\"name\", \"shape_array_key\"])\n", diff --git a/thruway_bus_validators/A2_plot_amtrak_thruway.ipynb b/thruway_bus_validators/A2_plot_amtrak_thruway.ipynb index 4f0071ab1..fa3b80748 100644 --- a/thruway_bus_validators/A2_plot_amtrak_thruway.ipynb +++ b/thruway_bus_validators/A2_plot_amtrak_thruway.ipynb @@ -131,7 +131,7 @@ "outputs": [], "source": [ "gdf = gdf.assign(\n", - " route_mi = ((gdf.geometry.to_crs(geography_utils.CA_StatePlane)\n", + " route_mi = ((gdf.geometry.to_crs(geography_utils.CA_NAD83Albers_ft)\n", " .length).divide(geography_utils.FEET_PER_MI)).round(2)\n", ")" ] @@ -470,7 +470,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.5" + "version": "3.9.13" } }, "nbformat": 4, diff --git a/thruway_bus_validators/A4_local_routes_same_od.py b/thruway_bus_validators/A4_local_routes_same_od.py index e35d9d911..8573cdf2b 100644 --- a/thruway_bus_validators/A4_local_routes_same_od.py +++ b/thruway_bus_validators/A4_local_routes_same_od.py @@ -20,7 +20,7 @@ def keep_long_shape_ids(routelines: dg.GeoDataFrame | gpd.GeoDataFrame, Filter down routelines file to just routes that are pretty long with shape_id. """ - routelines = routelines.to_crs(geography_utils.CA_StatePlane) + routelines = routelines.to_crs(geography_utils.CA_NAD83Albers_ft) routelines = routelines.assign( route_mi = routelines.geometry.length.divide( @@ -115,7 +115,7 @@ def buffer_around_origin_destination(gdf: gpd.GeoDataFrame, geom_cols = list(gdf.select_dtypes("geometry").columns) for c in geom_cols: - gdf[c] = gdf[c].to_crs(geography_utils.CA_StatePlane) + gdf[c] = gdf[c].to_crs(geography_utils.CA_NAD83Albers_ft) gdf = gdf.assign( origin_buffer = gdf.origin.buffer(buffer_feet), From 529ade65ea43f6206923b2e372d4e120e7a35519 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Fri, 13 Dec 2024 18:37:17 +0000 Subject: [PATCH 3/4] do not include oct2024g in list --- _shared_utils/shared_utils/rt_dates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_shared_utils/shared_utils/rt_dates.py b/_shared_utils/shared_utils/rt_dates.py index b56873dd8..6d24157e2 100644 --- a/_shared_utils/shared_utils/rt_dates.py +++ b/_shared_utils/shared_utils/rt_dates.py @@ -80,7 +80,7 @@ v for k, v in DATES.items() if k.endswith("2023") and not any(substring in k for substring in ["jan", "feb"]) ] -y2024_dates = [v for k, v in DATES.items() if k.endswith("2024")] +y2024_dates = [v for k, v in DATES.items() if k.endswith("2024") and k not in ["oct2024g"]] valid_weeks = ["apr2023", "oct2023", "apr2024", "oct2024"] From 0b043f398eac7cd9bb4732c48aa2cdfb47df78d8 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Tue, 7 Jan 2025 22:38:56 +0000 Subject: [PATCH 4/4] add ca_transit_stops ref --- open_data/create_stops_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/open_data/create_stops_data.py b/open_data/create_stops_data.py index 199f4dd3e..56c3ac29a 100644 --- a/open_data/create_stops_data.py +++ b/open_data/create_stops_data.py @@ -57,9 +57,9 @@ def add_distance_to_state_highway( orig_crs = stops.crs shn = catalog.state_highway_network.read()[ - ["geometry"]].to_crs(geography_utils.CA_NAD83Albers).geometry.iloc[0] + ["geometry"]].to_crs(geography_utils.CA_NAD83Albers_m).geometry.iloc[0] - stops = stops.to_crs(geography_utils.CA_NAD83Albers) + stops = stops.to_crs(geography_utils.CA_NAD83Albers_m) stops = stops.assign( meters_to_shn = stops.geometry.distance(shn).round(1)