diff --git a/bus_procurement_cost/FTA_bus_grant_analysis.ipynb b/bus_procurement_cost/FTA_bus_grant_analysis.ipynb
deleted file mode 100644
index d99820354..000000000
--- a/bus_procurement_cost/FTA_bus_grant_analysis.ipynb
+++ /dev/null
@@ -1,1281 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "id": "46b3c4b1-8668-49b7-a330-9bd1f092f7b9",
- "metadata": {
- "tags": []
- },
- "source": [
- "## FY23 FTA Bus and Low- and No-Emission Grant Awards Analysis\n",
- "\n",
- "GH issue: \n",
- "* Research Request - Bus Procurement Costs & Awards #897\n",
- "\n",
- "Data source(s): \n",
- "1. https://www.transit.dot.gov/funding/grants/fy23-fta-bus-and-low-and-no-emission-grant-awards\n",
- "2. https://storymaps.arcgis.com/stories/022abf31cedd438b808ec2b827b6faff\n",
- "\n",
- "Definitions: \n",
- "* Grants for Buses and Bus Facilities Program:\n",
- " * 49 U.S.C. 5339(b)) makes federal resources available to states and direct recipients to replace, rehabilitate and purchase buses and related equipment and to construct bus-related facilities, including technological changes or innovations to modify low or no emission vehicles or facilities. Funding is provided through formula allocations and competitive grants. \n",
- "
\n",
- "* Low or No Emission Vehicle Program:\n",
- " * 5339(c) provides funding to state and local governmental authorities for the purchase or lease of zero-emission and low-emission transit buses as well as acquisition, construction, and leasing of required supporting facilities.\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "id": "1ad111b5-3933-45c4-aa3c-f12ea701e882",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "import numpy as np\n",
- "import pandas as pd\n",
- "import shared_utils\n",
- "\n",
- "# set_option to increase max rows displayed to 200, to see entire df in 1 go/\n",
- "pd.set_option(\"display.max_rows\", 300)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "467fc38e-6313-44c7-9797-456ecd752e57",
- "metadata": {
- "tags": []
- },
- "source": [
- "## Reading in raw data from gcs"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "id": "a42530b9-806e-498b-8f82-9f123ddb087c",
- "metadata": {},
- "outputs": [],
- "source": [
- "gcs_path = \"gs://calitp-analytics-data/data-analyses/bus_procurement_cost/\"\n",
- "file = \"data-analyses_bus_procurement_cost_fta_press_release_data_csv.csv\"\n",
- "\n",
- "fta = pd.read_csv(gcs_path+file)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "7f37c5d6-664c-48fd-9c32-cf7dcd3318ae",
- "metadata": {
- "tags": []
- },
- "source": [
- "## Data Cleaning\n",
- "1. snake-case column names\n",
- "2. remove currency formatting from funding column (with $ and , )\n",
- "3. seperate text from # of bus col (split at '(')\n",
- " a. trim spaces in new col\n",
- " b. get rid of () characters in new col\n",
- "4. trim spaces in other columns\n",
- "5. exnamine column values and replace/update as needed\n",
- "6. create new columns for bus size type and prop type\n"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "cb70936a-8d54-4ae5-b9cc-ef64ea04c8b5",
- "metadata": {
- "tags": []
- },
- "source": [
- "### Dataframe cleaning"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "id": "7efb6ebf-474a-4c58-8052-a7427e881649",
- "metadata": {},
- "outputs": [],
- "source": [
- "def snake_case(df):\n",
- " '''\n",
- " snake case dataframe columns and stip of extra spaces\n",
- " '''\n",
- " df.columns = df.columns.str.lower().str.replace(\" \", \"_\").str.strip()\n",
- "\n",
- "\n",
- "def fund_cleaner(df, column):\n",
- " '''\n",
- " function to clean the funding column and make column int64\n",
- " '''\n",
- " df[column] = df[column].str.replace(\"$\", \"\").str.replace(\",\", \"\").str.strip().astype('int64')\n",
- "\n",
- " \n",
- "\n",
- "def value_replacer(df, col1, col1_val, col2, col2_new_val):\n",
- " '''\n",
- " function that replaces the value at a speicific row on a specific column.\n",
- " in this case, filters the df by a speific col/val, then replaces the value at new col/val\n",
- " '''\n",
- " df.loc[df[col1] == col1_val , col2] = col2_new_val\n",
- " "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "id": "f55008ae-1de5-4cd3-89f2-87cd4eac17cd",
- "metadata": {},
- "outputs": [],
- "source": [
- "# snake case function to Df\n",
- "snake_case(fta)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "d012306e-86ff-4351-86b3-c3a8dc3145fd",
- "metadata": {
- "tags": []
- },
- "source": [
- "### Column Cleaning"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "6fb7a5e0-6649-468b-9f0e-4b36281e0db0",
- "metadata": {
- "tags": []
- },
- "source": [
- "#### propulsion_type rename to propulstion category"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "id": "9e22e6fa-857d-44c3-beec-0f83f71a6b1c",
- "metadata": {},
- "outputs": [],
- "source": [
- "# rename col to propulsion category\n",
- "fta = fta.rename(columns={\"propulsion_type\": \"propulsion_category\"})\n",
- "\n",
- "# make values in prop_cat col lower case and remove spaces\n",
- "fta[\"propulsion_category\"] = fta[\"propulsion_category\"].str.lower()\n",
- "fta[\"propulsion_category\"] = fta[\"propulsion_category\"].str.replace(\" \", \"\")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "354cb76f-f71f-4a30-8c85-aece5ac3f0d3",
- "metadata": {
- "tags": []
- },
- "source": [
- "#### funding"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "id": "ff8721be-5cbd-430f-b947-4110c397de23",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/tmp/ipykernel_773/192928216.py:12: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
- " df[column] = df[column].str.replace(\"$\", \"\").str.replace(\",\", \"\").str.strip().astype('int64')\n"
- ]
- }
- ],
- "source": [
- "fund_cleaner(fta, \"funding\")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "7077380f-1f92-4108-bac1-77db8f79568d",
- "metadata": {
- "tags": []
- },
- "source": [
- "#### split `approx_#_of_buses` to `bus_count` and `prop_type`"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "id": "1fb94754-e795-4e78-8a91-2732566a1792",
- "metadata": {},
- "outputs": [],
- "source": [
- "# test of removing the spaces first in # of bus colum values, THEN split by (\n",
- "fta[\"approx_#_of_buses\"] = fta[\"approx_#_of_buses\"].str.replace(\" \", \"\")\n",
- "\n",
- "# spliting the # of buses column into 2, using the ( char as the delimiter\n",
- "# also fills `none` values with `needs manual check`\n",
- "fta[[\"bus_count\", \"prop_type\"]] = fta[\"approx_#_of_buses\"].str.split(\n",
- " pat=\"(\", n=1, expand=True\n",
- ")\n",
- "fta[[\"bus_count\", \"prop_type\"]] = fta[[\"bus_count\", \"prop_type\"]].fillna(\n",
- " \"needs manual check\"\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "7a6e2c90-9da1-4cfb-8032-397baa74579a",
- "metadata": {
- "tags": []
- },
- "source": [
- "#### bus_count"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "id": "c979c629-1a2f-4804-9f16-cdd27092f964",
- "metadata": {},
- "outputs": [],
- "source": [
- "# running function on rows that need specific value changes\n",
- "value_replacer(fta,'bus_count','56estimated-cutawayvans', 'bus_count', 56)\n",
- "value_replacer(fta,'bus_count','12batteryelectric','bus_count', 12)\n",
- "value_replacer(fta,'prop_type','PM-awardwillnotfund68buses)', 'prop_type', 'estimated-cutaway vans (PM- award will not fund 68 buses)')\n",
- "value_replacer(fta,'project_sponsor','City of Charlotte - Charlotte Area Transit System','bus_count',31)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "id": "ef56c2ba-a2db-46c4-82ac-c8d95b6e11ad",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "dtype('int64')"
- ]
- },
- "execution_count": 9,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "#checl data type for bus _count\n",
- "fta['bus_count'] = fta['bus_count'].astype('int64')\n",
- "\n",
- "#check work\n",
- "fta['bus_count'].dtype"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "58fd7f90-2935-4e58-92c6-4253758ad3c1",
- "metadata": {
- "tags": []
- },
- "source": [
- "#### project_type"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "id": "ca77c135-1168-489e-802e-402c614bbb77",
- "metadata": {},
- "outputs": [],
- "source": [
- "# using str.lower() on project type\n",
- "fta[\"project_type\"] = fta[\"project_type\"].str.lower().str.replace(\" \", \"\")\n",
- "# using str.lower() on project type\n",
- "# fta[\"project_type\"] = fta[\"project_type\"]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "id": "2014394c-33d9-44e9-b162-ab21988d9e8d",
- "metadata": {},
- "outputs": [],
- "source": [
- "# some values still need to get adjusted. will use a short dictionary to fix\n",
- "new_type = {\n",
- " \"\\tbus/facility\": \"bus/facility\",\n",
- " \"bus/facilitiy\": \"bus/facility\",\n",
- " \"facilities\": \"facility\",\n",
- "}\n",
- "# using replace() with the dictionary to replace keys in project type col\n",
- "fta.replace({\"project_type\": new_type}, inplace=True)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "dd2125a9-1117-4baa-b704-1aad30249e6d",
- "metadata": {
- "tags": []
- },
- "source": [
- "#### `prop_type`"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "id": "09750ef1-1eba-4561-b5ad-b05f2a3b5875",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/tmp/ipykernel_773/4065559258.py:3: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
- " fta[\"prop_type\"] = fta[\"prop_type\"].str.replace(\")\", \"\").str.strip()\n"
- ]
- }
- ],
- "source": [
- "# clearning the bus desc/prop_type col.\n",
- "# removing the )\n",
- "fta[\"prop_type\"] = fta[\"prop_type\"].str.replace(\")\", \"\").str.strip()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "id": "bba62d67-35ab-4334-898b-f7581b40c574",
- "metadata": {},
- "outputs": [],
- "source": [
- "# creating a dictionary to add spaces back to the values\n",
- "spaces = {\n",
- " \"beb\": \"BEB\",\n",
- " \"estimated-CNGbuses\": \"estimated-CNG buses\",\n",
- " \"cngbuses\": \"CNG buses\",\n",
- " \"BEBs\": \"BEB\",\n",
- " \"Electric\\n16(Hybrid\": \"15 electic, 16 hybrid\",\n",
- " \"FuelCellElectric\": \"fuel cell electric\",\n",
- " \"FuelCell\": \"fuel cell\",\n",
- " \"lowemissionCNG\": \"low emission CNG\",\n",
- " \"cng\": \"CNG\",\n",
- " \"BEBsparatransitbuses\": \"BEBs paratransit buses\",\n",
- " \"hybridelectric\": \"hybrid electric\",\n",
- " \"zeroemissionbuses\": \"zero emission buses\",\n",
- " \"dieselelectrichybrids\": \"diesel electric hybrids\",\n",
- " \"hydrogenfuelcell\": \"hydrogen fuel cell\",\n",
- " \"2BEBsand4HydrogenFuelCellBuses\": \"2 BEBs and 4 hydrogen fuel cell buses\",\n",
- " \"4fuelcell/3CNG\": \"4 fuel cell / 3 CNG\",\n",
- " \"hybridelectricbuses\": \"hybrid electric buses\",\n",
- " \"CNGfueled\": \"CNG fueled\",\n",
- " \"zeroemissionelectric\": \"zero emission electric\",\n",
- " \"hybridelectrics\": \"hybrid electrics\",\n",
- " \"dieselandgas\": \"diesel and gas\",\n",
- " \"diesel-electrichybrids\": \"diesel-electric hybrids\",\n",
- " \"propanebuses\": \"propane buses\",\n",
- " \"1:CNGbus;2cutawayCNGbuses\": \"1:CNGbus ;2 cutaway CNG buses\",\n",
- " \"zeroemission\": \"zero emission\",\n",
- " \"propanedpoweredvehicles\": \"propaned powered vehicles\",\n",
- "}\n",
- "\n",
- "# using new dictionary to replace values in the bus desc col\n",
- "fta.replace({\"prop_type\": spaces}, inplace=True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "id": "9bd26f13-146d-43cc-9a23-23032ac2b173",
- "metadata": {},
- "outputs": [],
- "source": [
- "# dict to validate prop_type values\n",
- "prop_type_dict = {\n",
- " \"15 electic, 16 hybrid\": \"mix (zero and low emission buses)\",\n",
- " \"1:CNGbus ;2 cutaway CNG buses\": \"mix (zero and low emission buses)\",\n",
- " \"2 BEBs and 4 hydrogen fuel cell buses\": \"mix (BEB and FCEB)\",\n",
- " \"4 fuel cell / 3 CNG\": \"mix (zero and low emission buses)\",\n",
- " \"BEBs paratransit buses\": \"BEB\",\n",
- " \"CNG buses\": \"CNG\",\n",
- " \"CNG fueled\": \"CNG\",\n",
- " \"Electric\": \"electric (not specified)\",\n",
- " \"battery electric\": \"BEB\",\n",
- " \"diesel and gas\": \"mix (low emission)\",\n",
- " \"diesel electric hybrids\": \"low emission (hybrid)\",\n",
- " \"diesel-electric\": \"low emission (hybrid)\",\n",
- " \"diesel-electric hybrids\": \"low emission (hybrid)\",\n",
- " \"electric\": \"electric (not specified)\",\n",
- " \"estimated-CNG buses\": \"CNG\",\n",
- " \"estimated-cutaway vans (PM- award will not fund 68 buses\": \"mix (zero and low emission buses)\",\n",
- " \"fuel cell\": \"FCEB\",\n",
- " \"fuel cell electric\": \"FCEB\",\n",
- " \"hybrid\": \"low emission (hybrid)\",\n",
- " \"hybrid electric\": \"low emission (hybrid)\",\n",
- " \"hybrid electric buses\": \"low emission (hybrid)\",\n",
- " \"hybrid electrics\": \"low emission (hybrid)\",\n",
- " \"hydrogen fuel cell\": \"FCEB\",\n",
- " \"low emission CNG\": \"CNG\",\n",
- " \"propane\": \"low emission (propane)\",\n",
- " \"propane buses\": \"low emission (propane)\",\n",
- " \"propaned powered vehicles\": \"low emission (propane)\",\n",
- " \"zero emission\": \"zero-emission bus (not specified)\",\n",
- " \"zero emission buses\": \"zero-emission bus (not specified)\",\n",
- " \"zero emission electric\": \"zero-emission bus (not specified)\",\n",
- " \"zero-emission\": \"zero-emission bus (not specified)\",\n",
- "}\n",
- "\n",
- "# repalcing values in prop type with prop type dictionary\n",
- "fta.replace({\"prop_type\": prop_type_dict}, inplace=True)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "dc3aacbb-0dcb-4bb0-9ea7-6c2fe1d75b49",
- "metadata": {},
- "source": [
- "### fix `prop_type == needs manual check`\n",
- "\n",
- "- subset a df of only prop type == needs manual check\n",
- "- create list of keywords to check prop type\n",
- "- create function to replace `needs manualc check` values with list values\n",
- "- then... do something with both dataframes? \n",
- " * remove rows with `needs manual check`\n",
- " * then append subset df to initial df?\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "id": "580e9c87-9aae-4221-bc29-251e4e1469be",
- "metadata": {},
- "outputs": [],
- "source": [
- "# subdf of just `needs manual check` prop_types\n",
- "manual_check = fta[fta[\"prop_type\"] == \"needs manual check\"]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "id": "9ea7203e-c44f-4b02-a6da-31e7dea885c9",
- "metadata": {},
- "outputs": [],
- "source": [
- "# function to match keywords to list\n",
- "def prop_type_finder(description):\n",
- " for keyword in manual_checker_list:\n",
- " if keyword in description:\n",
- " return keyword\n",
- " return \"no bus procurement\""
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "id": "3e66c103-bec0-4925-9a71-b446f6931c33",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/tmp/ipykernel_773/3261224827.py:40: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " manual_check[\"prop_type\"] = manual_check[\"description\"].apply(prop_type_finder)\n"
- ]
- }
- ],
- "source": [
- "manual_checker_list = [\n",
- " \"propane-powered\",\n",
- " \"hybrid diesel-electric buses\",\n",
- " \"propane fueled buses\",\n",
- " \"cutaway vehicles\",\n",
- " \"diesel-electric hybrid\",\n",
- " \"low or no emission buses\",\n",
- " \"electric buses\",\n",
- " \"hybrid-electric vehicles\",\n",
- " \"electric commuter\",\n",
- " \"Electric Buses\",\n",
- " \"battery electric\",\n",
- " \"Batery Electric\",\n",
- " \"battery-electric\",\n",
- " \"fuel-cell\",\n",
- " \"fuel cell\",\n",
- " \"Fuel Cell\",\n",
- " \"zero emission\",\n",
- " \"Zero Emission\",\n",
- " \"zero-emission electric buses\",\n",
- " \"zero-emission buses\",\n",
- " \"zero‐emission\",\n",
- " \"zero-emission\",\n",
- " \"zeroemission\",\n",
- " \"CNG\",\n",
- " \"cng\",\n",
- " \"County Mass Transit District will receive funding to buy buses\",\n",
- " \"Colorado will receive funding to buy vans to replace older ones\",\n",
- " \"ethanol-fueled buses\",\n",
- " \"will receive funding to buy vans to replace\",\n",
- " \"funding to replace the oldest buses\",\n",
- " \"to buy buses and charging equipment\",\n",
- " \"counties by buying buses\",\n",
- " \"receive funding to buy cutaway paratransit buses\",\n",
- " \"new replacement vehicles\",\n",
- "]\n",
- "\n",
- "# creates a new column called 'prop_type' by applying function to description column. \n",
- "# the function will check the values against the description col against the list, then return the keyword the row matched too\n",
- "manual_check[\"prop_type\"] = manual_check[\"description\"].apply(prop_type_finder)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "eb4126ed-30a6-4d36-9aa4-4edcdd3d3606",
- "metadata": {},
- "source": [
- "### use dictionary to change manual_check prop_type values to match validated values"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "id": "347a856d-84d4-420e-b9ca-5a57c784a9ec",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/tmp/ipykernel_773/3041575545.py:25: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " manual_check.replace({\"prop_type\": manual_check_dict}, inplace=True)\n"
- ]
- }
- ],
- "source": [
- "manual_check_dict= {'zero emission': 'zero-emission bus (not specified)',\n",
- " 'electric buses':'electric (not specified)',\n",
- " 'zero-emission': 'zero-emission bus (not specified)',\n",
- " 'low or no emission buses' : 'mix (zero and low emission buses)',\n",
- " 'zero-emission buses': 'zero-emission bus (not specified)',\n",
- " 'new replacement vehicles':'not specified',\n",
- " 'receive funding to buy cutaway paratransit buses': 'not specified',\n",
- " 'counties by buying buses': 'not specified',\n",
- " 'battery-electric' : 'BEB',\n",
- " 'to buy buses and charging equipment':'not specified',\n",
- " 'propane-powered': 'low emission (propane)',\n",
- " 'funding to replace the oldest buses':'not specified',\n",
- " 'diesel-electric hybrid': 'low emission (hybrid)',\n",
- " 'hybrid diesel-electric buses': 'low emission (hybrid)',\n",
- " 'cutaway vehicles':'not specified',\n",
- " 'propane fueled buses': 'low emission (propane)',\n",
- " 'County Mass Transit District will receive funding to buy buses':'not specified',\n",
- " 'ethanol-fueled buses': 'low emission (ethanol)',\n",
- " 'will receive funding to buy vans to replace': 'not specified',\n",
- " 'Colorado will receive funding to buy vans to replace older ones': 'not specified',\n",
- " 'hybrid-electric vehicles': 'low emission (hybrid)'\n",
- "}\n",
- "\n",
- "# replace prop_type values using manual_check_dict\n",
- "manual_check.replace({\"prop_type\": manual_check_dict}, inplace=True)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "d0ab5f47-a5b9-42a4-8c8d-c7c7be793ad8",
- "metadata": {},
- "source": [
- "### deleting rows from iniail df that have prop_type == 'needs manual check'"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "id": "cf13bebf-ae89-477b-9ee7-f9a5d8d922e9",
- "metadata": {},
- "outputs": [],
- "source": [
- "# filters df for rows that do not equal `needs manual check`\n",
- "# expect rows to drop from 130 to 72?\n",
- "fta = fta[fta['prop_type'] != 'needs manual check']"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "id": "11fdb527-0c74-4c4b-a9dd-54679e8cb744",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/tmp/ipykernel_773/3394426208.py:2: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
- " fta = fta.append(manual_check, ignore_index=True)\n"
- ]
- }
- ],
- "source": [
- "### appending rows from manual_check to initial df\n",
- "fta = fta.append(manual_check, ignore_index=True)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "8a498a0b-4552-4c38-90cd-6e1c4045d40d",
- "metadata": {
- "tags": []
- },
- "source": [
- "### Need new column for `bus size type` via list and function\n",
- "cutaway, 40ft etc"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 21,
- "id": "6053159a-62b1-4866-a13e-47fabf4576b4",
- "metadata": {},
- "outputs": [],
- "source": [
- "bus_size = [\n",
- " \"standard\",\n",
- " \"40 foot\",\n",
- " \"40-foot\",\n",
- " \"40ft\",\n",
- " \"articulated\",\n",
- " \"cutaway\",\n",
- "]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 22,
- "id": "3500afae-6358-435f-9d25-1854b9f16634",
- "metadata": {},
- "outputs": [],
- "source": [
- "# Function to match keywords\n",
- "def find_bus_size_type(description):\n",
- " for keyword in bus_size:\n",
- " if keyword in description.lower():\n",
- " return keyword\n",
- " return \"not specified\""
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 23,
- "id": "b7a0ae00-3a85-4e95-ae94-7ffeb55c3a8d",
- "metadata": {},
- "outputs": [],
- "source": [
- "# new column called bus size type based on description column\n",
- "fta[\"bus_size_type\"] = fta[\"description\"].apply(find_bus_size_type)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 24,
- "id": "75ec2282-9c0b-4640-9741-f3363c233b74",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "array([100, 90, 20, 40, 35, 16, 30, 31, 23, 7, 25, 13, 4,\n",
- " 17, 39, 12, 37, 14, 50, 8, 6, 11, 56, 10, 9, 5,\n",
- " 15, 2, 3, 1, 0, 69, 18, 160, 134, 42])"
- ]
- },
- "execution_count": 24,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "#fix bus_count col\n",
- "fta['bus_count'].unique()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "9a83f537-ccff-44c2-8b41-82b2c88b9ccd",
- "metadata": {
- "tags": []
- },
- "source": [
- "## Exporting cleaned data to GCS"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 26,
- "id": "609a3659-8bf2-4412-aabc-d6ea956c3bbe",
- "metadata": {},
- "outputs": [],
- "source": [
- "# saving to GCS as csv\n",
- "\n",
- "clean_file = 'fta_bus_cost_clean.parquet'\n",
- "\n",
- "fta.to_parquet(gcs_path+clean_file)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "8a04ee87-fba7-46df-ac67-44956fa82c7c",
- "metadata": {
- "tags": []
- },
- "source": [
- "## Reading in cleaned data from GCS"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "0886453f-e9d5-46d8-9987-3365137137b2",
- "metadata": {},
- "outputs": [],
- "source": [
- "bus_cost = pd.read_csv(\n",
- " \"gs://calitp-analytics-data/data-analyses/bus_procurement_cost/fta_bus_cost_clean.csv\"\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "8a4e9841-79f5-41e1-9c53-3def3d802bd9",
- "metadata": {},
- "outputs": [],
- "source": [
- "# confirming cleaned data shows as expected.\n",
- "display(bus_cost.shape, type(bus_cost), bus_cost.columns)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "f95f7ae2-2ddf-40b7-aa76-af00e83854d1",
- "metadata": {},
- "outputs": [],
- "source": [
- "bus_cost[\"prop_type\"].sort_values(ascending=True).unique()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "1ede25fd-850a-4be6-bac0-a1bffa05b776",
- "metadata": {
- "jp-MarkdownHeadingCollapsed": true,
- "tags": []
- },
- "source": [
- "## DEPRECATED - Data Analysis\n",
- "actual data analysis and summary stats exist in the `cost_per_bus_analysis.ipynb` notebook"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "42ca0dfd-2ea1-4194-b431-0e4853d21879",
- "metadata": {
- "tags": []
- },
- "source": [
- "### Cost per Bus, per Transit Agency dataframe"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "26485b67-dc48-4102-a835-96460649d8ff",
- "metadata": {},
- "outputs": [],
- "source": [
- "only_bus = bus_cost[bus_cost[\"bus_count\"] > 0]\n",
- "only_bus.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "a8f05625-7da1-4aea-ac71-c177d02ca008",
- "metadata": {},
- "outputs": [],
- "source": [
- "cost_per_bus = (\n",
- " only_bus.groupby(\"project_sponsor\")\n",
- " .agg({\"funding\": \"sum\", \"bus_count\": \"sum\"})\n",
- " .reset_index()\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "c3ab90a5-5e7f-4ab6-8ecc-a7598e185f5b",
- "metadata": {},
- "outputs": [],
- "source": [
- "cost_per_bus[\"cost_per_bus\"] = (\n",
- " cost_per_bus[\"funding\"] / cost_per_bus[\"bus_count\"]\n",
- ").astype(\"int64\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "ce71b957-ea77-40c8-95de-d6f160462f58",
- "metadata": {},
- "outputs": [],
- "source": [
- "cost_per_bus.dtypes"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "b6f9ca72-4977-4536-9e6b-828af63c23b1",
- "metadata": {},
- "outputs": [],
- "source": [
- "cost_per_bus"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "f02f391a-920c-458a-95e0-fd7875e6a653",
- "metadata": {},
- "outputs": [],
- "source": [
- "## export cost_per_bus df to gcs\n",
- "cost_per_bus.to_csv(\n",
- " \"gs://calitp-analytics-data/data-analyses/bus_procurement_cost/fta_cost_per_bus.csv\"\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "79e18bf0-3dff-4263-9b47-8f2e4ee3d2e8",
- "metadata": {
- "tags": []
- },
- "source": [
- "### Cost per bus, stats analysis"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "2ec98703-78f8-4c1c-b5c3-5a4834b1b44f",
- "metadata": {},
- "outputs": [],
- "source": [
- "# read in fta cost per bus csv\n",
- "cost_per_bus = pd.read_csv(\n",
- " \"gs://calitp-analytics-data/data-analyses/bus_procurement_cost/fta_cost_per_bus.csv\"\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "6aebe6c0-eeb6-4da3-88ce-79f1fe7f5f6f",
- "metadata": {},
- "outputs": [],
- "source": [
- "display(cost_per_bus.shape, cost_per_bus.head())"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "299d75c2-2f1a-4a7d-a1ee-87271a1f9e4b",
- "metadata": {
- "tags": []
- },
- "source": [
- "### Initial Summary Stats"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "c4f9c488-8306-4eb3-bac0-40c75ac1dfed",
- "metadata": {
- "tags": []
- },
- "source": [
- "### Summary Stats"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "76e7e2f6-47c8-4423-a9b3-2f02ee06fb9f",
- "metadata": {},
- "outputs": [],
- "source": [
- "# top level alanysis\n",
- "\n",
- "bus_cost.agg({\"project_title\": \"count\", \"funding\": \"sum\", \"bus_count\": \"sum\"})"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "a1c0bdc5-9b63-40bd-94b7-2aa04b9bc70a",
- "metadata": {},
- "outputs": [],
- "source": [
- "# start of agg. by project_type\n",
- "\n",
- "bus_cost.groupby(\"project_type\").agg(\n",
- " {\"project_type\": \"count\", \"funding\": \"sum\", \"bus_count\": \"sum\"}\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "f036cf52-fcc7-4f43-b11a-79a75515efc3",
- "metadata": {},
- "outputs": [],
- "source": [
- "# agg by program\n",
- "\n",
- "bus_cost.groupby(\"bus/low-no_program\").agg(\n",
- " {\"project_type\": \"count\", \"funding\": \"sum\", \"bus_count\": \"sum\"}\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "228da8c2-9b22-4d1c-9259-4417e9374309",
- "metadata": {},
- "outputs": [],
- "source": [
- "# agg by state, by funding\n",
- "bus_cost.groupby(\"state\").agg(\n",
- " {\"project_type\": \"count\", \"funding\": \"sum\", \"bus_count\": \"sum\"}\n",
- ").sort_values(by=\"funding\", ascending=False)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "e85e540e-0396-49b0-9f2e-64e5236e63e8",
- "metadata": {
- "tags": []
- },
- "source": [
- "### Projects with bus purchases"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "8bad1e5d-9d05-49ef-ab21-81b98d80ef75",
- "metadata": {},
- "outputs": [],
- "source": [
- "# df of only projects with a bus count\n",
- "only_bus = bus_cost[bus_cost[\"bus_count\"] > 0]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "6c5013e3-ed56-4ea6-b191-af9a8e797b4b",
- "metadata": {},
- "outputs": [],
- "source": [
- "display(only_bus.shape, only_bus.columns)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "e93b7fe9-49e8-475d-aa89-c085f6792978",
- "metadata": {},
- "outputs": [],
- "source": [
- "# agg by propulsion type\n",
- "only_bus[\"propulsion_type\"].value_counts()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "dc7cc8c9-f81a-43c1-bb62-e8e2467358ce",
- "metadata": {},
- "outputs": [],
- "source": [
- "only_bus.project_type.value_counts()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "d86ee60d-e3e8-4dea-880f-731f6ac64376",
- "metadata": {},
- "outputs": [],
- "source": [
- "# of the rows with bus_count >1, what are the project types?\n",
- "bus_agg = only_bus.groupby(\"project_type\").agg(\n",
- " {\"project_type\": \"count\", \"funding\": \"sum\", \"bus_count\": \"sum\"}\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "f7afb2df-374e-4ee2-b99c-6959d2111a37",
- "metadata": {},
- "outputs": [],
- "source": [
- "# new column that calculates `cost per bus`\n",
- "bus_agg[\"cost_per_bus\"] = (bus_agg[\"funding\"] / bus_agg[\"bus_count\"]).astype(\"int64\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "f8b1c63c-9763-4d24-9613-433aefd8e4bf",
- "metadata": {},
- "outputs": [],
- "source": [
- "bus_agg"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "211cb7b6-8fb1-4d52-890a-7106afb981a0",
- "metadata": {
- "tags": []
- },
- "source": [
- "### Projects with no buses"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "e018dd24-4c96-4d1b-ad48-d67d706d165b",
- "metadata": {},
- "outputs": [],
- "source": [
- "no_bus = bus_cost[bus_cost[\"bus_count\"] < 1]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "c9ce24e3-5366-4cb5-9f92-4d24b50493e4",
- "metadata": {},
- "outputs": [],
- "source": [
- "no_bus[\"project_type\"].value_counts()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "ac02fbbb-2a88-486f-8001-fd8156c50bfb",
- "metadata": {
- "tags": []
- },
- "source": [
- "### Overall Summary"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "6ad1b9f5-a5c7-4fee-abb2-cb74dee4ab40",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "project_count = bus_cost.project_title.count()\n",
- "fund_sum = bus_cost.funding.sum()\n",
- "bus_count_sum = bus_cost.bus_count.sum()\n",
- "overall_cost_per_bus = (fund_sum) / (bus_count_sum)\n",
- "bus_program_count = bus_cost[\"bus/low-no_program\"].value_counts()\n",
- "\n",
- "projects_with_bus = only_bus.project_title.count()\n",
- "projects_with_bus_funds = only_bus.funding.sum()\n",
- "cost_per_bus = (only_bus.funding.sum()) / (bus_count_sum)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "7e932b1e-f5e5-4215-91b5-4c922e78062a",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "summary = f\"\"\"\n",
- "Top Level observation:\n",
- "- {project_count} projects awarded\n",
- "- ${fund_sum:,.2f} dollars awarded\n",
- "- {bus_count_sum} buses to be purchased\n",
- "- ${overall_cost_per_bus:,.2f} overall cost per bus\n",
- "\n",
- "Projects have some mix of buses, facilities and equipment. Making it difficult to disaggregate actual bus cost.\n",
- "\n",
- "Of the {project_count} projects awarded, {projects_with_bus} projects inlcuded buses. The remainder were facilities, chargers and equipment\n",
- "\n",
- "Projects with buses purchases:\n",
- "- {projects_with_bus} projects\n",
- "- ${projects_with_bus_funds:,.2f} awarded to purchases buses\n",
- "- ${cost_per_bus:,.2f} cost per bus\n",
- "\"\"\""
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "dfb95588-9083-4298-8822-460c2dad9941",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "print(summary)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "2429adb2-fdfe-4a0f-a7c5-8ed2035f6fad",
- "metadata": {},
- "outputs": [],
- "source": [
- "# Assuming your DataFrame is named df\n",
- "cost_per_bus_values = cost_per_bus[\"cost_per_bus\"]\n",
- "\n",
- "# Calculate mean and standard deviation\n",
- "mean_value = cost_per_bus_values.mean()\n",
- "std_deviation = cost_per_bus_values.std()\n",
- "\n",
- "# Plot histogram\n",
- "plt.hist(cost_per_bus_values, bins=30, color=\"skyblue\", edgecolor=\"black\", alpha=0.7)\n",
- "\n",
- "# Add vertical lines for mean and standard deviation\n",
- "plt.axvline(mean_value, color=\"red\", linestyle=\"dashed\", linewidth=2, label=\"Mean\")\n",
- "plt.axvline(\n",
- " mean_value + std_deviation,\n",
- " color=\"green\",\n",
- " linestyle=\"dashed\",\n",
- " linewidth=2,\n",
- " label=\"Mean + 1 Std Dev\",\n",
- ")\n",
- "plt.axvline(\n",
- " mean_value - std_deviation,\n",
- " color=\"green\",\n",
- " linestyle=\"dashed\",\n",
- " linewidth=2,\n",
- " label=\"Mean - 1 Std Dev\",\n",
- ")\n",
- "\n",
- "# Set labels and title\n",
- "plt.xlabel(\"cost_per_bus\")\n",
- "plt.ylabel(\"Frequency\")\n",
- "plt.title(\"Histogram of cost_per_bus with Mean and Std Dev Lines\")\n",
- "plt.legend()\n",
- "\n",
- "# Show the plot\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "c3b5a48f-804e-4525-98fc-39d3fa845f6f",
- "metadata": {},
- "outputs": [],
- "source": [
- "import matplotlib.pyplot as plt\n",
- "import matplotlib.ticker as mticker\n",
- "import pandas as pd\n",
- "\n",
- "# Assuming your DataFrame is named df\n",
- "cost_per_bus_values = cost_per_bus[\"cost_per_bus\"]\n",
- "\n",
- "# Calculate mean and standard deviation\n",
- "mean_value = cost_per_bus_values.mean()\n",
- "std_deviation = cost_per_bus_values.std()\n",
- "\n",
- "# Plot histogram\n",
- "plt.hist(cost_per_bus_values, bins=20, color=\"skyblue\", edgecolor=\"black\", alpha=0.7)\n",
- "\n",
- "# Add vertical lines for mean and standard deviation\n",
- "plt.axvline(mean_value, color=\"red\", linestyle=\"dashed\", linewidth=2, label=\"Mean\")\n",
- "plt.axvline(\n",
- " mean_value + std_deviation,\n",
- " color=\"green\",\n",
- " linestyle=\"dashed\",\n",
- " linewidth=2,\n",
- " label=\"Mean + 1 Std Dev\",\n",
- ")\n",
- "plt.axvline(\n",
- " mean_value - std_deviation,\n",
- " color=\"green\",\n",
- " linestyle=\"dashed\",\n",
- " linewidth=2,\n",
- " label=\"Mean - 1 Std Dev\",\n",
- ")\n",
- "\n",
- "# Set labels and title\n",
- "plt.xlabel(\"Cost per Bus (USD)\")\n",
- "plt.ylabel(\"Frequency\")\n",
- "plt.title(\"Histogram of Cost per Bus with Mean and Std Dev Lines\")\n",
- "plt.legend()\n",
- "\n",
- "# Format x-axis ticks as USD\n",
- "plt.gca().xaxis.set_major_formatter(mticker.StrMethodFormatter(\"${x:,.0f}\"))\n",
- "\n",
- "# Show the plot\n",
- "plt.show()"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.9.13"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/bus_procurement_cost/Makefile b/bus_procurement_cost/Makefile
index 494e97e6b..23cf662b3 100644
--- a/bus_procurement_cost/Makefile
+++ b/bus_procurement_cost/Makefile
@@ -4,6 +4,5 @@ all_bus_scripts:
python tircp_data_cleaner.py
python dgs_data_cleaner.py
python cost_per_bus_cleaner.py
- python cost_per_bus_utils.py
jupyter nbconvert --to notebook --execute --inplace cost_per_bus_analysis.ipynb
- jupyter nbconvert --to html --no-input --no-prompt cost_per_bus_analysis.ipynb
\ No newline at end of file
+ jupyter nbconvert --to html --no-input --no-prompt cost_per_bus_analysis.ipynb
diff --git a/bus_procurement_cost/README.md b/bus_procurement_cost/README.md
index 3969a34d9..1953c0f6b 100644
--- a/bus_procurement_cost/README.md
+++ b/bus_procurement_cost/README.md
@@ -7,10 +7,11 @@
* (upcoming )Washington and/or Georgia Contract list (via Rebel)
## GH issue
-Research Request - Bus Procurement Costs & Awards #897
+* [Research Request - Bus Procurement Costs & Awards #897](https://github.com/cal-itp/data-analyses/issues/897)
+* [Research Task - Refactor: Bus Procurement Cost #1142](https://github.com/cal-itp/data-analyses/issues/1142)
## Research Question
-Identify federal awards to fund bus purchases and how much agencies pay for them.
+Analyze bus procurement projects to see how much transit agencies pay for them.
## Methodology
- Examine each dataset to:
@@ -49,39 +50,51 @@ Identify federal awards to fund bus purchases and how much agencies pay for them
## Script Explanation
+- **bus_cost_utils.py**
+ * contains all the shared functions and variable used throughout the cleaner scripts
+
+
Executing `make all_bus_scripts` will run the following scripts
+
- **fta_data_cleaner.py:**
* Reads in and cleans FTA data
* outputs 2 files:
- * cleaned, all projects: `fta_all_projects_clean.parquet`
- * cleaned, bus only projects:`fta_bus_cost_clean.parquet`
+ * cleaned, all projects: `clean_fta_all_projects.parquet`
+ * cleaned, bus only projects:`clean_fta_bus_only.parquet`
+
- **tircp_data_cleaner.py**
* Reads in and cleans tircp data
* outputs 2 files:
- * cleaned, all projects: `clean_tircp_project.parquet`
- * cleaned, bus only projects:`clean_tircp_project_bus_only.parquet`
+ * cleaned, all projects: `clean_tircp_all_project.parquet`
+ * cleaned, bus only projects:`clean_tircp_bus_only.parquet`
+
- **dgs_data_cleaner.py**
* Reads in and cleans DGS data
* outputs 2 files:
- * cleaned, bus only projects: `dgs_agg_clean.parquet`
- * cleaned, bus only projects with options:`dgs_agg_w_options_clean.parquet`
+ * cleaned, bus only projects: `clean_dgs_all_projects.parquet`
+ * cleaned, bus only projects with options:`clean_dgs_bus_only_w_options.parquet`
+
- **cost_per_bus_cleaner.py**
* Reads in and merges all the bus only datasets
- * updates columns names
-
-- **cost_per_bus_utils.py**
- * stores variables for summary section (total projects, total buses, etc)
- * stores chart functions to be used in notebook
- * stores the summary and conclusion text.
+ * updates columns names
+ * calculates `cost_per_bus`, z-score and idetifies outliers.
+ * outputs 2 files:
+ * cleaned projects: `cleaned_cpb_analysis_data_merge.parquet`
+ * cleaned, removed outliers: `cleaned_no_outliers_cpb_analysis_data_merge.parquet`
+
- **nbconvert --to notebook**
* runs all cells in the `cost_per_bus_analysis.ipynb`
* overwrites the nb in place
+
- **nbconvert --to html**
* converts the nb to HTML
- * hides the code cells and prompts
\ No newline at end of file
+ * hides the code cells and prompts
+
+
+output files are saved to GCS at: `calitp-analytics-data/data-analyses/bus_procurement_cost`
diff --git a/bus_procurement_cost/bus_cost_utils.py b/bus_procurement_cost/bus_cost_utils.py
new file mode 100644
index 000000000..0249d6494
--- /dev/null
+++ b/bus_procurement_cost/bus_cost_utils.py
@@ -0,0 +1,258 @@
+#script with shared functions used throughout the bus cost analysis.
+
+import pandas as pd
+from IPython.display import Markdown, display
+
+GCS_PATH = "gs://calitp-analytics-data/data-analyses/bus_procurement_cost/"
+
+def new_prop_finder(description: str) -> str:
+ """
+ function that matches keywords from each propulsion type list against the item description col, returns a standardized prop type
+ now includes variable that make description input lowercase.
+ to be used with .assign()
+ """
+
+ BEB_list = [
+ "battery electric",
+ "BEBs paratransit buses"
+ ]
+
+ cng_list = [
+ "cng",
+ "compressed natural gas"
+ ]
+
+ electric_list = [
+ "electric buses",
+ "electric commuter",
+ "electric",
+ ]
+
+ FCEB_list = [
+ "fuel cell",
+ "hydrogen",
+ #"fuel cell electric",
+ #"hydrogen fuel cell",
+ #"fuel cell electric bus",
+ #"hydrogen electric bus",
+ ]
+
+ # low emission (hybrid)
+ hybrid_list = [
+ #"diesel electric hybrids",
+ #"diesel-electric hybrids",
+ #"hybrid electric",
+ #"hybrid electric buses",
+ #"hybrid electrics",
+ "hybrids",
+ "hybrid",
+ ]
+
+ # low emission (propane)
+ propane_list = [
+ #"propane buses",
+ #"propaned powered vehicles",
+ "propane",
+ ]
+
+ mix_beb_list = [
+ "2 BEBs and 4 hydrogen fuel cell buses",
+ ]
+
+ mix_lowe_list = [
+ "diesel and gas",
+ ]
+
+ mix_zero_low_list = [
+ "15 electic, 16 hybrid",
+ "4 fuel cell / 3 CNG",
+ "estimated-cutaway vans (PM- award will not fund 68 buses",
+ "1:CNGbus ;2 cutaway CNG buses",
+ ]
+
+ zero_e_list = [
+ #"zero emission buses",
+ #"zero emission electric",
+ #"zero emission vehicles",
+ "zero-emission",
+ "zero emission",
+ ]
+
+ item_description = description.lower().replace("‐", " ").strip()
+
+ if any(word in item_description for word in BEB_list) and not any(
+ word in item_description for word in ["diesel", "hybrid", "fuel cell"]
+ ):
+ return "BEB"
+
+ elif any(word in item_description for word in FCEB_list):
+ return "FCEB"
+
+ elif any(word in item_description for word in hybrid_list):
+ return "low emission (hybrid)"
+
+ elif any(word in item_description for word in mix_beb_list):
+ return "mix (BEB and FCEB)"
+
+ elif any(word in item_description for word in mix_lowe_list):
+ return "mix (low emission)"
+
+ elif any(word in item_description for word in mix_zero_low_list):
+ return "mix (zero and low emission)"
+
+ elif any(word in item_description for word in zero_e_list):
+ return "zero-emission bus (not specified)"
+
+ elif any(word in item_description for word in propane_list):
+ return "low emission (propane)"
+
+ elif any(word in item_description for word in electric_list):
+ return "electric (not specified)"
+
+ elif any(word in item_description for word in cng_list):
+ return "CNG"
+
+ else:
+ return "not specified"
+
+def new_bus_size_finder(description: str) -> str:
+ """
+ Similar to prop_type_find, matches keywords to item description col and return standardized bus size type.
+ now includes variable that make description input lowercase.
+ To be used with .assign()
+ """
+
+ articulated_list = [
+ "60 foot",
+ "articulated",
+ ]
+
+ standard_bus_list = [
+ "30 foot",
+ "35 foot",
+ "40 foot",
+ "40ft",
+ "45 foot",
+ "standard",
+ ]
+
+ cutaway_list = [
+ "cutaway",
+ ]
+
+ other_bus_size_list = ["feeder bus"]
+
+ otr_bus_list = [
+ "coach style",
+ "over the road",
+ ]
+
+ item_description = description.lower().replace("-", " ").strip()
+
+ if any(word in item_description for word in articulated_list):
+ return "articulated"
+
+ elif any(word in item_description for word in standard_bus_list):
+ return "standard/conventional (30ft-45ft)"
+
+ elif any(word in item_description for word in cutaway_list):
+ return "cutaway"
+
+ elif any(word in item_description for word in otr_bus_list):
+ return "over-the-road"
+
+ elif any(word in item_description for word in other_bus_size_list):
+ return "other"
+
+ else:
+ return "not specified"
+
+def project_type_finder(description: str) -> str:
+ """
+ function to match keywords to project description col to identify projects that only have bus procurement.
+ used to identify projects into diffferent categories: bus only, bus + others, no bus procurement.
+ use with .assign() to get a new col.
+ """
+ bus_list =[
+ "bus",
+ "transit vehicles",# for fta list
+ "cutaway vehicles",# for fta list
+ "zero-emission vehicles", # for tircp list
+ "zero emission vehicles",
+ "zero‐emissions vans",
+ "hybrid-electric vehicles",
+ "battery-electric vehicles",
+ "buy new replacement vehicles", # specific string for fta list
+ ]
+
+ exclude_list =[
+ "facility",
+ #"station",
+ "stops",
+ "installation",
+ "depot",
+ "construct",
+ "infrastructure",
+ "signal priority",
+ "improvements",
+ "build",
+ "chargers",
+ "charging equipment",
+ "install",
+ "rail",
+ "garage",
+ "facilities",
+ "bus washing system",
+ "build a regional transit hub" # specific string needed for fta list
+ #"associated infrastructure" may need to look at what is associated infrastructure is for ZEB
+
+ ]
+ proj_description = description.lower().strip()
+
+ if any(word in proj_description for word in bus_list) and not any(
+ word in proj_description for word in exclude_list
+ ):
+ return "bus only"
+
+ elif any(word in proj_description for word in exclude_list) and not any(
+ word in proj_description for word in bus_list
+ ):
+ return "non-bus components"
+
+ elif any(word in proj_description for word in exclude_list) and any(
+ word in proj_description for word in bus_list
+ ):
+ return "includes bus and non-bus components"
+
+ else:
+ return "needs review"
+
+def col_row_updater(df: pd.DataFrame, col1: str, val1, col2: str, new_val):
+ """
+ function used to update values at specificed columns and row value.
+ """
+ df.loc[df[col1] == val1, col2] = new_val
+
+ return
+
+#def bus_min_max_summary(data:pd.DataFrame, col1:str, col_list=["transit_agency",
+# "total_agg_cost",
+# "total_bus_count",
+# "new_cost_per_bus"]):
+# """
+# function to display min/max of specific column in aggregated bus df.
+
+# """
+
+# return display(Markdown(f"**Max {col1}**"),
+# data[data[col1] == data[col1].max()][col_list],
+# Markdown(f"**Min {col1}**"),
+# data[data[col1] == data[col1].min()][col_list]
+# )
+
+def outlier_flag(col):
+ """
+ function to flag outlier rows. use with .apply()
+ """
+
+ return col <= -3 or col >= 3
\ No newline at end of file
diff --git a/bus_procurement_cost/cost_per_bus_analysis.html b/bus_procurement_cost/cost_per_bus_analysis.html
index b77c9e00d..cf6a8338e 100644
--- a/bus_procurement_cost/cost_per_bus_analysis.html
+++ b/bus_procurement_cost/cost_per_bus_analysis.html
@@ -14575,6 +14575,36 @@
This analysis examines the cost of buses for transit agencies across the county. Specifically, to observe the variation of bus cost for propulsion type and bus sizes.
This analysis examines the cost of buses for transit agencies across the county. Specifically, to observe the variation of bus cost for propulsion type.
Data was compiled from three data sources:
-
130 projects from FTA Bus and Low- and No-Emission Grant Awards press release (federally funded, nationwide data)
-
124 projects TIRCP project data (state-funded, California only)
-
35 projects DGS usage report for all procurements from California agencies purchasing from New Flyer and Portera Inc..
+
FTA Bus and Low- and No-Emission Grant Awards press release (federally funded, nationwide data)
+
TIRCP project data (state-funded, California only data)
+
DGS usage report for all procurements from California agencies purchasing from New Flyer and Portera Inc..
-
The compiled dataset includes 289 total transit related projects. However, the initial dataset included projects that encompassed bus procurement and other components such as charging installation and facility construction, as well as non-bus related projects (ferries, trains). The dataset was filtered to exclude projects that were not bus related, indicated 0 buses procured, and projects that contained construction/installation work. 87 projects remained that specified the number of buses to procure and explicitly described procuring buses (bus only projects).
-
Number of bus only contracts from each dataset
+
The initial dataset included nearly 300 projects. It was reduced to 88 projects after applying criteria to exclude non-bus related work.
+Projects involving the construction of new facilities, training programs, or the procurement of non-bus items such as trains and ferries were excluded.
+The final dataset comprised only projects focused on bus procurement.
+
These projects were aggregated against propulsion type and bus size type, and categorized by ZEB and non-ZEB.
+
Breakdown of each data souce showing the total buses and cost for each source:
+
+
+
+
source
+
bus_count
+
total_cost
+
cost_per_bus
+
+
+
+
+
dgs
+
236.0
+
250112853
+
1059800
+
+
+
fta
+
883.0
+
391257025
+
443099
+
+
+
tircp
+
233.0
+
187250513
+
803650
+
+
+
Grand Total
+
1352.0
+
828620391
+
612884
+
+
+
ZEB projects are categorized into the following propulsion types:
-
FTA: 43
-
TIRCP: 9
-
DGS: 35
+
zero-emission (not specified)
+
electric (not specified)
+
battery electric
+
fuel cell electric
-
The remaining bus only projects were categorized into different propulsion types and bus sizes, a “cost per bus” value was calculated, and outliers removed.
-
A overall summary is provided below:
+
Non-ZEB projects include the following propulsion types:
-
Total projects: 298
-
Number of projects with mix bus procurement and other components, also non-bus projects: 204
-
Number of bus only projects: 87
-
Total dollars awarded to bus only projects: $831,843,715.00
-
Total number of buses: 1353.0
-
Most common propulsion type procured for bus only projects: BEB at 30 projects
-
Number of ZEB buses* procured: 452.0
-
Number of non-ZEB buses procured: 575.0**
-
Overall average cost per bus (ZEB & non-ZEB) is $792,635.34 (std $396,712.61)
-
ZEB average cost per bus is $1,056,659.30 (std $253,737.82)
-
Non-ZEB average cost per bus is $528,106.49 (std $315,932.20)
+
compressed natural gass (CNG)
+
ethanol
+
low-emission (hybrid, propane)
+
diesel
+
gas
-
*ZEB buses include: zero-emission (not specified), electric (not specified), battery electric, fuel cell electric
Based on the findings so far in bus only projects, there is evidence that bus procurement cost vary widely amongst transit agencies all over the country. Non-ZEB bus cost variation was wide. Whereas ZEB cost variation was much tighter. However ZEBs do have a higher cost per bus than non-ZEB.
-
Most of the bus only projects were for non-ZEBs. This can be explained by looking into the initial project list. Lots of projects that procured ZEBs also included the installation of chargers and related charging infrastructure. Indicating that transit agencies are still adopting and preparing for ZEBs and need to make the initial investment in the equipment.
Based on these findings, The average cost of a ZEB, throughout the US, is ~$1,000,000, roughly twice the price of a conventional, non-ZEB.
+The variance in cost depends mainly on the options the Trasnit
+Agencies chooses. Highly optioned/customized buses contribute to high cost.
+Unfortunately, analyzing the cost of configuable options is outside the scope of data provided.
diff --git a/bus_procurement_cost/cost_per_bus_analysis.ipynb b/bus_procurement_cost/cost_per_bus_analysis.ipynb
index 567f6cea7..a47c83eda 100644
--- a/bus_procurement_cost/cost_per_bus_analysis.ipynb
+++ b/bus_procurement_cost/cost_per_bus_analysis.ipynb
@@ -6,10 +6,10 @@
"id": "da041e43-e8e2-4d4b-a498-10a7c0afe43f",
"metadata": {
"execution": {
- "iopub.execute_input": "2024-03-29T21:43:20.780383Z",
- "iopub.status.busy": "2024-03-29T21:43:20.778571Z",
- "iopub.status.idle": "2024-03-29T21:43:44.073054Z",
- "shell.execute_reply": "2024-03-29T21:43:44.071725Z"
+ "iopub.execute_input": "2024-06-26T23:31:22.076309Z",
+ "iopub.status.busy": "2024-06-26T23:31:22.075847Z",
+ "iopub.status.idle": "2024-06-26T23:31:41.268659Z",
+ "shell.execute_reply": "2024-06-26T23:31:41.267222Z"
},
"tags": []
},
@@ -20,103 +20,1438 @@
"import pandas as pd\n",
"import seaborn as sns\n",
"import shared_utils\n",
- "from cost_per_bus_utils import *\n",
+ "from bus_cost_utils import *\n",
"from IPython.display import Markdown, display\n",
"from matplotlib.ticker import ScalarFormatter\n",
- "from scipy.stats import zscore"
+ "from scipy.stats import zscore\n",
+ "from IPython.display import Markdown, display"
]
},
{
"cell_type": "code",
"execution_count": 2,
- "id": "c51fe7dd-22e2-4686-b1a5-57b2f5ad8602",
+ "id": "d53376d9-d4b4-48b7-9916-5b9f633fbaf0",
"metadata": {
"execution": {
- "iopub.execute_input": "2024-03-29T21:43:44.082755Z",
- "iopub.status.busy": "2024-03-29T21:43:44.080809Z",
- "iopub.status.idle": "2024-03-29T21:43:44.091937Z",
- "shell.execute_reply": "2024-03-29T21:43:44.091025Z"
+ "iopub.execute_input": "2024-06-26T23:31:41.275269Z",
+ "iopub.status.busy": "2024-06-26T23:31:41.273892Z",
+ "iopub.status.idle": "2024-06-26T23:31:42.396698Z",
+ "shell.execute_reply": "2024-06-26T23:31:42.395233Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "merged_data = pd.read_parquet(f'{GCS_PATH}cleaned_no_outliers_cpb_analysis_data_merge.parquet')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "a45f2d3d-a600-4fe6-80cf-6b887036faab",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-06-26T23:31:42.402301Z",
+ "iopub.status.busy": "2024-06-26T23:31:42.401073Z",
+ "iopub.status.idle": "2024-06-26T23:31:42.406919Z",
+ "shell.execute_reply": "2024-06-26T23:31:42.406184Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# for subsetting ZEB and nonZEB\n",
+ "zeb_list =[\n",
+ " \"BEB\",\n",
+ " \"FCEB\",\n",
+ " \"electric (not specified)\",\n",
+ " \"zero-emission bus (not specified)\",\n",
+ "]\n",
+ "\n",
+ "non_zeb_list =[\n",
+ " \"CNG\",\n",
+ " \"ethanol\",\n",
+ " \"low emission (hybrid)\",\n",
+ " \"low emission (propane)\",\n",
+ " \"mix (zero and low emission)\",\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "8ac40482-ba3e-4fde-8c05-806e3725de44",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-06-26T23:31:42.411899Z",
+ "iopub.status.busy": "2024-06-26T23:31:42.410719Z",
+ "iopub.status.idle": "2024-06-26T23:31:42.424045Z",
+ "shell.execute_reply": "2024-06-26T23:31:42.422703Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# means and standard deviations\n",
+ "# for graphs\n",
+ "cpb_mean = merged_data[\"cost_per_bus\"].mean()\n",
+ "cpb_std = merged_data[\"cost_per_bus\"].std()\n",
+ "\n",
+ "#testing weighted average calculation for sub-set non-zeb and zeb\n",
+ "zeb_cpb_wt_avg = (merged_data[merged_data[\"prop_type\"].isin(zeb_list)][\"total_cost\"].sum() / merged_data[merged_data[\"prop_type\"].isin(zeb_list)][\"bus_count\"].sum())\n",
+ "non_zeb_cpb_wt_avg = (merged_data[merged_data[\"prop_type\"].isin(non_zeb_list)][\"total_cost\"].sum() / merged_data[merged_data[\"prop_type\"].isin(non_zeb_list)][\"bus_count\"].sum())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "d450fd60-cced-453b-b20b-62cdade0d7a6",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-06-26T23:31:42.429985Z",
+ "iopub.status.busy": "2024-06-26T23:31:42.428735Z",
+ "iopub.status.idle": "2024-06-26T23:31:42.438675Z",
+ "shell.execute_reply": "2024-06-26T23:31:42.437203Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "def new_cpb_aggregate(df: pd.DataFrame, column=\"transit_agency\") -> pd.DataFrame:\n",
+ " \"\"\"\n",
+ " function to aggregate compiled data by different categories:\n",
+ " \"transit agency\", \n",
+ " \"propulsion type\", \n",
+ " \"bus_size_type\",\n",
+ " \"new_project_type\"\n",
+ " aggregate on columns:\n",
+ " \"project_title\"\n",
+ " \"ppno\"\n",
+ " \"total_cost\"\n",
+ " \"bus_count\"\n",
+ " \n",
+ " Then, cost per bus is calculated AFTER the aggregation.\n",
+ " \"\"\"\n",
+ " df_agg = (\n",
+ " df.groupby(column)\n",
+ " .agg(\n",
+ " total_project_count=(\"project_title\", \"count\"),\n",
+ " total_project_count_ppno=(\"ppno\", \"count\"),\n",
+ " total_agg_cost=(\"total_cost\", \"sum\"),\n",
+ " total_bus_count=(\"bus_count\", \"sum\"),\n",
+ " #new_prop_type=(\"prop_type\",\"max\")\n",
+ " )\n",
+ " .reset_index()\n",
+ " )\n",
+ " df_agg[\"new_cost_per_bus\"] = (df_agg[\"total_agg_cost\"] / df_agg[\"total_bus_count\"]).astype(\"int64\")\n",
+ " \n",
+ " #calculate zscore\n",
+ " df_agg[\"new_zscore_cost_per_bus\"] = zscore(df_agg[\"new_cost_per_bus\"])\n",
+ " \n",
+ " #flag outliers\n",
+ " df_agg[\"new_is_cpb_outlier?\"] = df_agg[\"new_zscore_cost_per_bus\"].apply(outlier_flag)\n",
+ " \n",
+ " return df_agg"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "2a2dc407-20cc-45de-84b1-bb5991dad8ac",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-06-26T23:31:42.443616Z",
+ "iopub.status.busy": "2024-06-26T23:31:42.443245Z",
+ "iopub.status.idle": "2024-06-26T23:31:42.449251Z",
+ "shell.execute_reply": "2024-06-26T23:31:42.448129Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "def bus_min_max_summary(data:pd.DataFrame, col1:str, col_list=[\"transit_agency\",\n",
+ " \"total_agg_cost\",\n",
+ " \"total_bus_count\",\n",
+ " \"new_cost_per_bus\"]):\n",
+ " \"\"\"\n",
+ " function to display min/max of specific column in aggregated bus df.\n",
+ " \n",
+ " \"\"\"\n",
+ "\n",
+ " return display(Markdown(f\"**Max {col1}**\"),\n",
+ " data[data[col1] == data[col1].max()][col_list],\n",
+ " Markdown(f\"**Min {col1}**\"),\n",
+ " data[data[col1] == data[col1].min()][col_list])\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "9a6a7ecf-5180-4691-84fe-23aa68cdae93",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-06-26T23:31:42.454688Z",
+ "iopub.status.busy": "2024-06-26T23:31:42.453665Z",
+ "iopub.status.idle": "2024-06-26T23:31:42.462034Z",
+ "shell.execute_reply": "2024-06-26T23:31:42.460971Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "def make_chart(y_col: str, title: str, data: pd.DataFrame, x_col: str):\n",
+ " \"\"\"\n",
+ " function to create chart. sorts values by y_col ascending.\"\"\"\n",
+ " \n",
+ " data.sort_values(by=y_col, ascending=False).head(10).plot(\n",
+ " x=x_col, y=y_col, kind=\"bar\", color=\"skyblue\"\n",
+ " )\n",
+ " plt.title(title)\n",
+ " plt.xlabel(x_col)\n",
+ " plt.ylabel(y_col)\n",
+ "\n",
+ " plt.ticklabel_format(style=\"plain\", axis=\"y\")\n",
+ " plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "44d21201-223f-4e6c-b238-b72fba984544",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-06-26T23:31:42.466539Z",
+ "iopub.status.busy": "2024-06-26T23:31:42.466167Z",
+ "iopub.status.idle": "2024-06-26T23:31:42.475338Z",
+ "shell.execute_reply": "2024-06-26T23:31:42.474135Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "def dist_curve(\n",
+ " df: pd.DataFrame,\n",
+ " mean: str,\n",
+ " std: str,\n",
+ " title: str,\n",
+ " xlabel: str,\n",
+ "):\n",
+ " \"\"\"\n",
+ " function to make distribution curve. uses the \"cpb\" column of the df.\n",
+ " \"\"\"\n",
+ " sns.histplot(df[\"cost_per_bus\"], kde=True, color=\"skyblue\", bins=20)\n",
+ " # mean line\n",
+ " plt.axvline(\n",
+ " mean, color=\"red\", linestyle=\"dashed\", linewidth=2, label=f\"Mean: ${mean:,.2f}\"\n",
+ " )\n",
+ " # mean+1std\n",
+ " plt.axvline(\n",
+ " mean + std,\n",
+ " color=\"green\",\n",
+ " linestyle=\"dashed\",\n",
+ " linewidth=2,\n",
+ " label=f\"Standard Deviation: ${std:,.2f}\",\n",
+ " )\n",
+ " plt.axvline(mean - std, color=\"green\", linestyle=\"dashed\", linewidth=2)\n",
+ " plt.axvline(mean + (std * 2), color=\"green\", linestyle=\"dashed\", linewidth=2)\n",
+ " plt.axvline(mean + (std * 3), color=\"green\", linestyle=\"dashed\", linewidth=2)\n",
+ "\n",
+ " plt.title(title + \" with Mean and Standard Deviation\")\n",
+ " plt.xlabel(xlabel)\n",
+ " plt.ylabel(\"Frequency\")\n",
+ "\n",
+ " # Turn off scientific notation on x-axis?\n",
+ " plt.gca().xaxis.set_major_formatter(ScalarFormatter(useMathText=False))\n",
+ "\n",
+ " plt.legend()\n",
+ " plt.show()\n",
+ "\n",
+ " return"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "067a14a5-5c77-4914-82a8-c5eeb170cb08",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-06-26T23:31:42.480883Z",
+ "iopub.status.busy": "2024-06-26T23:31:42.480520Z",
+ "iopub.status.idle": "2024-06-26T23:31:42.540136Z",
+ "shell.execute_reply": "2024-06-26T23:31:42.538977Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# aggregating by big categories\n",
+ "agg_agency = new_cpb_aggregate(merged_data)\n",
+ "agg_prop = new_cpb_aggregate(merged_data, column=\"prop_type\")\n",
+ "agg_bus_size = new_cpb_aggregate(merged_data, column=\"bus_size_type\")\n",
+ "agg_source = new_cpb_aggregate(merged_data, column=\"source\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "49a97d01-b17e-475c-b351-67426f3741d9",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-06-26T23:31:42.545384Z",
+ "iopub.status.busy": "2024-06-26T23:31:42.544807Z",
+ "iopub.status.idle": "2024-06-26T23:31:42.552797Z",
+ "shell.execute_reply": "2024-06-26T23:31:42.551294Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# subsetting ZEB and nonZEB data\n",
+ "zeb_projects = merged_data[merged_data[\"prop_type\"].isin(zeb_list)]\n",
+ "\n",
+ "non_zeb_projects = merged_data[merged_data[\"prop_type\"].isin(non_zeb_list)]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "4faaa4ad-b16c-4e6b-87c7-d12f7e7db3c6",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-06-26T23:31:42.558015Z",
+ "iopub.status.busy": "2024-06-26T23:31:42.557625Z",
+ "iopub.status.idle": "2024-06-26T23:31:42.583902Z",
+ "shell.execute_reply": "2024-06-26T23:31:42.581389Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "#pivot table to get totals for each prop type\n",
+ "\n",
+ "pivot_prop_type = pd.pivot_table(\n",
+ " merged_data,\n",
+ " values = [\"bus_count\", \"total_cost\"],\n",
+ " index = \"prop_type\",\n",
+ " aggfunc = \"sum\",\n",
+ " margins = True,\n",
+ " margins_name = \"Grand Total\"\n",
+ ").reset_index()\n",
+ "pivot_prop_type[\"cost_per_bus\"] = (pivot_prop_type[\"total_cost\"] / pivot_prop_type[\"bus_count\"]).astype(\"int64\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "b8535e97-e7bf-4d7e-b718-24c5758b0ccd",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-06-26T23:31:42.593655Z",
+ "iopub.status.busy": "2024-06-26T23:31:42.593253Z",
+ "iopub.status.idle": "2024-06-26T23:31:42.617829Z",
+ "shell.execute_reply": "2024-06-26T23:31:42.616633Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "#pivot for ZEB data\n",
+ "\n",
+ "pivot_zeb_prop = pd.pivot_table(\n",
+ " #filted incoming DF for zeb prop types\n",
+ " zeb_projects,\n",
+ " values = [\"bus_count\", \"total_cost\"],\n",
+ " index = \"prop_type\",\n",
+ " aggfunc = \"sum\",\n",
+ " margins = True,\n",
+ " margins_name = \"Grand Total\"\n",
+ ").reset_index() \n",
+ "\n",
+ "pivot_zeb_prop[\"cost_per_bus\"] = (pivot_zeb_prop[\"total_cost\"] / pivot_zeb_prop[\"bus_count\"]).astype(\"int64\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "829e38c9-3f9b-4e82-92a8-c86f81051580",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-06-26T23:31:42.623085Z",
+ "iopub.status.busy": "2024-06-26T23:31:42.622309Z",
+ "iopub.status.idle": "2024-06-26T23:31:42.647068Z",
+ "shell.execute_reply": "2024-06-26T23:31:42.645629Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "#pivot for non-ZEB data\n",
+ "\n",
+ "pivot_non_zeb_prop = pd.pivot_table(\n",
+ " #filted incoming DF for non-zeb prop types\n",
+ " non_zeb_projects,\n",
+ " values = [\"bus_count\", \"total_cost\"],\n",
+ " index = \"prop_type\",\n",
+ " aggfunc = \"sum\",\n",
+ " margins = True,\n",
+ " margins_name = \"Grand Total\"\n",
+ ").reset_index()\n",
+ "\n",
+ "pivot_non_zeb_prop[\"cost_per_bus\"] = (pivot_non_zeb_prop[\"total_cost\"] / pivot_non_zeb_prop[\"bus_count\"]).astype(\"int64\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "0a2163e3-dac1-4e64-a551-3dc961e44714",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-06-26T23:31:42.652492Z",
+ "iopub.status.busy": "2024-06-26T23:31:42.651667Z",
+ "iopub.status.idle": "2024-06-26T23:31:42.676873Z",
+ "shell.execute_reply": "2024-06-26T23:31:42.675765Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# pivot for bus sizes\n",
+ "\n",
+ "pivot_size = pd.pivot_table(\n",
+ " merged_data,\n",
+ " values = [\"bus_count\", \"total_cost\"],\n",
+ " # multi-index pivot\n",
+ " index = [\"prop_type\",\"bus_size_type\"],\n",
+ " aggfunc = \"sum\",\n",
+ " margins = True,\n",
+ " margins_name = \"Grand Total\"\n",
+ ")\n",
+ "\n",
+ "pivot_size[\"cost_per_bus\"] = (pivot_size[\"total_cost\"] / pivot_size[\"bus_count\"]).astype(\"int64\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "074acb8d-de54-43a0-b243-a070ecfbe1ce",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-06-26T23:31:42.682116Z",
+ "iopub.status.busy": "2024-06-26T23:31:42.681322Z",
+ "iopub.status.idle": "2024-06-26T23:31:42.706549Z",
+ "shell.execute_reply": "2024-06-26T23:31:42.705488Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "#pivot for data soruces\n",
+ "pivot_source = pd.pivot_table(\n",
+ " merged_data,\n",
+ " values = [\"bus_count\", \"total_cost\"],\n",
+ " index = \"source\",\n",
+ " aggfunc = \"sum\",\n",
+ " margins = True,\n",
+ " margins_name = \"Grand Total\"\n",
+ ").reset_index()\n",
+ "\n",
+ "pivot_source[\"cost_per_bus\"] = (pivot_source[\"total_cost\"] / pivot_source[\"bus_count\"]).astype(\"int64\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "d8356953-e32d-47ab-b67c-fa016cad9c50",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-06-26T23:31:42.711308Z",
+ "iopub.status.busy": "2024-06-26T23:31:42.710956Z",
+ "iopub.status.idle": "2024-06-26T23:31:42.721845Z",
+ "shell.execute_reply": "2024-06-26T23:31:42.720467Z"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# new summary\n",
+ "\n",
+ "new_summary = f\"\"\"\n",
+ "\n",
+ "# Bus Procurement Cost Analysis\n",
+ "\n",
+ "## Summary\n",
+ "This analysis examines the cost of buses for transit agencies across the county. Specifically, to observe the variation of bus cost for propulsion type.\n",
+ "\n",
+ "Data was compiled from three data sources:\n",
+ "1. FTA Bus and Low- and No-Emission Grant Awards press release (federally funded, nationwide data)\n",
+ "2. TIRCP project data (state-funded, California only data)\n",
+ "3. DGS usage report for all procurements from California agencies purchasing from New Flyer and Portera Inc.. \n",
+ "\n",
+ "The initial dataset included nearly 300 projects. It was reduced to {len(merged_data)} projects after applying criteria to exclude non-bus related work. \n",
+ "Projects involving the construction of new facilities, training programs, or the procurement of non-bus items such as trains and ferries were excluded. \n",
+ "The final dataset comprised only projects focused on bus procurement. \n",
+ "\n",
+ "\n",
+ "These projects were aggregated against propulsion type and bus size type, and categorized by ZEB and non-ZEB.\n",
+ "\n",
+ "\n",
+ "Breakdown of each data souce showing the total buses and cost for each source:\n",
+ "{pivot_source.to_html(index= False)}\n",
+ "\n",
+ "\n",
+ "**ZEB projects are categorized into the following propulsion types:**\n",
+ "- zero-emission (not specified) \n",
+ "- electric (not specified)\n",
+ "- battery electric \n",
+ "- fuel cell electric\n",
+ "\n",
+ "**Non-ZEB projects include the following propulsion types:**\n",
+ "- compressed natural gass (CNG) \n",
+ "- ethanol \n",
+ "- low-emission (hybrid, propane) \n",
+ "- diesel \n",
+ "- gas\n",
+ "\n",
+ "Below are charts and tables that summarize the findings.\n",
+ "\n",
+ "\"\"\"\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "f64881b9-46f9-4bfe-afd0-511385e21306",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-06-26T23:31:42.726916Z",
+ "iopub.status.busy": "2024-06-26T23:31:42.726027Z",
+ "iopub.status.idle": "2024-06-26T23:31:42.736211Z",
+ "shell.execute_reply": "2024-06-26T23:31:42.734904Z"
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/markdown": [
+ "\n",
+ "\n",
+ "# Bus Procurement Cost Analysis\n",
+ "\n",
+ "## Summary\n",
+ "This analysis examines the cost of buses for transit agencies across the county. Specifically, to observe the variation of bus cost for propulsion type.\n",
+ "\n",
+ "Data was compiled from three data sources:\n",
+ "1. FTA Bus and Low- and No-Emission Grant Awards press release (federally funded, nationwide data)\n",
+ "2. TIRCP project data (state-funded, California only data)\n",
+ "3. DGS usage report for all procurements from California agencies purchasing from New Flyer and Portera Inc.. \n",
+ "\n",
+ "The initial dataset included nearly 300 projects. It was reduced to 88 projects after applying criteria to exclude non-bus related work. \n",
+ "Projects involving the construction of new facilities, training programs, or the procurement of non-bus items such as trains and ferries were excluded. \n",
+ "The final dataset comprised only projects focused on bus procurement. \n",
+ "\n",
+ "\n",
+ "These projects were aggregated against propulsion type and bus size type, and categorized by ZEB and non-ZEB.\n",
+ "\n",
+ "\n",
+ "Breakdown of each data souce showing the total buses and cost for each source:\n",
+ "
"
+ ],
+ "text/plain": [
+ " bus_size_type total_bus_count\n",
+ "0 articulated 41.0\n",
+ "1 cutaway 152.0\n",
+ "2 not specified 881.0\n",
+ "3 over-the-road 14.0\n",
+ "4 standard/conventional (30ft-45ft) 264.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
}
],
"source": [
- "# COST PER BUS BY PROP TYPE\n",
- "display(Markdown(cpb_prop_type_desc))\n",
- "make_chart(\"cpb\", \"Cost per bus by propulsion type\", x_col=\"prop_type\", data=prop_agg)"
+ "#bus size bar chart\n",
+ "display(Markdown(\n",
+ " \"## What is the total bus counts compared to each bus size category?\"\n",
+ "))\n",
+ "\n",
+ "display(\n",
+ "make_chart(\"total_bus_count\", \"\"\"Bus Size Count.\n",
+ "excluding 'not specified' responses.\"\"\", x_col=\"bus_size_type\",data=agg_bus_size[agg_bus_size[\"bus_size_type\"]!=\"not specified\"]),\n",
+ "agg_bus_size[[\"bus_size_type\",\"total_bus_count\"]]\n",
+ ")\n"
]
},
{
"cell_type": "code",
- "execution_count": 7,
- "id": "7462b55c-29ef-4909-a7dd-27e1c84157d0",
+ "execution_count": 31,
+ "id": "8d030948-59ea-4ea5-9db6-5d8639f6f8f5",
"metadata": {
"execution": {
- "iopub.execute_input": "2024-03-29T21:43:45.534497Z",
- "iopub.status.busy": "2024-03-29T21:43:45.534166Z",
- "iopub.status.idle": "2024-03-29T21:43:45.716339Z",
- "shell.execute_reply": "2024-03-29T21:43:45.715359Z"
- },
- "tags": []
+ "iopub.execute_input": "2024-06-26T23:31:44.319904Z",
+ "iopub.status.busy": "2024-06-26T23:31:44.319501Z",
+ "iopub.status.idle": "2024-06-26T23:31:44.340895Z",
+ "shell.execute_reply": "2024-06-26T23:31:44.339535Z"
+ }
},
"outputs": [
{
"data": {
"text/markdown": [
- "\n",
- "## Bus count by propulsion type. \n"
+ "## What is the breakdown of ZEB Propulsion Type and Bus Size Category?"
],
"text/plain": [
""
@@ -327,9 +1974,119 @@
},
{
"data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAKfCAYAAACIWwmjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAA9hAAAPYQGoP6dpAACWyklEQVR4nOzdd1QU5/s28GtB6VWliCJiRRQUNSpqEEuwxR5ji2JviEbsSSxoEss3GjUaS4xi79HEXlCxYUcsWMAGKogNEJH+vH/4c15XUAGB2R2vzzl7DvvM7O41uLL3zjxFJYQQICIiIlIoHbkDEBERERUkFjtERESkaCx2iIiISNFY7BAREZGisdghIiIiRWOxQ0RERIrGYoeIiIgUjcUOERERKRqLHSIiIlI0FjtEVKB69+4NExMTuWNoPE9PT3h6eubqMQEBAVCpVLh7926BZCJSChY79Nl784Hx9s3a2hqNGzfGnj175I6nccLCwjBlyhR+wH7mfv31V2zfvl3uGEQ5wmKH6P9MnToVq1evxqpVqzB27Fg8fvwYrVq1ws6dO+WOplHCwsLg7+/PYkcD9OzZE69evYKDg0OhvzaLHdImReQOQKQpWrZsidq1a0v3+/XrBxsbG6xfvx5ff/21jMmooL18+RLGxsZyx8g1XV1d6Orqyh2DSOPxzA7Re1hYWMDQ0BBFivz/7wRHjhyBSqXCkSNH1Pa9e/cuVCoVAgICpLaYmBj06dMHpUuXhr6+PkqWLIl27drl6IzI9evX8e2338LKygqGhoaoXLkyfvzxR7V9QkJC0LJlS5iZmcHExARNmzbFqVOn1PaZMmUKVCpVlufPrq9H2bJl8fXXX+P48eOoU6cODAwMUK5cOaxatUrtcZ07dwYANG7cWLrs9+7vIzu3b99G8+bNYWxsDDs7O0ydOhVCCACAEAJly5ZFu3btsjwuOTkZ5ubmGDRo0AefX6VSYdiwYVi7di0qV64MAwMD1KpVC0ePHs32dxIWFobu3bvD0tISDRs2BACkp6dj2rRpKF++PPT19VG2bFn88MMPSElJUXuON7+r/fv3o0aNGjAwMICzszP++eefbF/rXTnta/PHH3+gatWqMDIygqWlJWrXro1169Z99Hn+/PNPVK1aFfr6+rCzs4OPjw/i4uLU9vH09ES1atUQFhaGxo0bw8jICKVKlcKsWbM+mAl4/bt++fIlVq5cKb0HevfujcOHD0OlUmHbtm1ZHrNu3TqoVCoEBwcD+P99uT70vngjMzMTc+fORdWqVWFgYAAbGxsMGjQIz58//2hWIoDFDpEkPj4eT548wePHj3H16lUMGTIEiYmJ+O677/L0fJ06dcK2bdvQp08f/Pnnnxg+fDhevHiByMjIDz7u0qVLqFu3Lg4dOoQBAwZg3rx5aN++PXbs2CHtc/XqVXz55ZcIDQ3F2LFjMXHiRNy5cweenp44ffp0nvICQEREBL755ht89dVXmD17NiwtLdG7d29cvXoVAODh4YHhw4cDAH744QesXr0aq1evRpUqVT74vBkZGWjRogVsbGwwa9Ys1KpVC5MnT8bkyZMBvP7w/O6777Bnzx48e/ZM7bE7duxAQkJCjv4dgoKC8P333+O7777D1KlT8fTpU7Ro0QJXrlzJsm/nzp2RlJSEX3/9FQMGDAAA9O/fH5MmTULNmjXx+++/o1GjRpg+fTq6du2a5fHh4eHo0qULWrZsienTp6NIkSLo3LkzDhw48NGcOfHXX39h+PDhcHZ2xty5c+Hv748aNWp89N93ypQp8PHxgZ2dHWbPno1OnTphyZIl8PLyQlpamtq+z58/R4sWLVC9enXMnj0bTk5OGDdu3Ef7qq1evRr6+vr48ssvpffAoEGD4OnpCXt7e6xduzbLY9auXYvy5cvD3d1davvY++KNQYMGYcyYMWjQoAHmzZuHPn36YO3atWjevHmWYyLKliD6zK1YsUIAyHLT19cXAQEBavsePnxYABCHDx9Wa79z544AIFasWCGEEOL58+cCgPjf//6X6zweHh7C1NRU3Lt3T609MzNT+rl9+/ZCT09P3Lp1S2p7+PChMDU1FR4eHlLb5MmTRXb/zd8c8507d6Q2BwcHAUAcPXpUaouNjRX6+vpi1KhRUtvmzZuz/R28j7e3twAgfH191Y6ldevWQk9PTzx+/FgIIcSNGzcEALFo0SK1x7dt21aULVtW7fiz8+bf7dy5c1LbvXv3hIGBgejQoYPU9uZ30q1bN7XHX7x4UQAQ/fv3V2sfPXq0ACAOHToktb35XW3dulVqi4+PFyVLlhRubm5ZXutd2f3+GzVqJBo1aiTdb9eunahateoHj/nd54mNjRV6enrCy8tLZGRkSPstWLBAABDLly9Xez0AYtWqVVJbSkqKsLW1FZ06dfrg6wohhLGxsfD29s7SPmHCBKGvry/i4uKkttjYWFGkSBExefJkqS2n74tjx44JAGLt2rVqr7N3795s24mywzM7RP9n4cKFOHDgAA4cOIA1a9agcePG6N+/f5ZLEzlhaGgIPT09HDlyJFen2h8/foyjR4+ib9++KFOmjNq2N5dDMjIysH//frRv3x7lypWTtpcsWRLdu3fH8ePHkZCQkOvMAODs7Iwvv/xSum9lZYXKlSvj9u3beXq+tw0bNkz6+c0lp9TUVBw8eBAAUKlSJdStW1ftrMCzZ8+wZ88e9OjRI9vLQe9yd3dHrVq1pPtlypRBu3btsG/fPmRkZKjtO3jwYLX7u3fvBgD4+fmptY8aNQoAsGvXLrV2Ozs7dOjQQbpvZmaGXr16ISQkBDExMR/N+jEWFha4f/8+zp49m+PHHDx4EKmpqfj++++ho/P//7wPGDAAZmZmWY7BxMRE7YyZnp4e6tSp80n/3r169UJKSgq2bNkitW3cuBHp6enZnp372Pti8+bNMDc3x1dffYUnT55It1q1asHExASHDx/Oc1b6fLDYIfo/derUQbNmzdCsWTP06NEDu3btgrOzs/THNzf09fUxc+ZM7NmzBzY2NvDw8MCsWbM++iH45kOmWrVq793n8ePHSEpKQuXKlbNsq1KlCjIzMxEVFZWrvG+8W2ABgKWl5Sf3jdDR0VErzIDXxQ0Atf4mvXr1wokTJ3Dv3j0Arz/o0tLS0LNnzxy9TsWKFbO0VapUCUlJSXj8+LFau6Ojo9r9e/fuQUdHBxUqVFBrt7W1hYWFhZTpjQoVKmQpwLI7prwaN24cTExMUKdOHVSsWBE+Pj44ceLEBx/zJuO77w09PT2UK1cuyzGULl06yzF86r+3k5MTvvjiC7Wide3atahXr16W321O3hfh4eGIj4+HtbU1rKys1G6JiYmIjY3Nc1b6fLDYIXoPHR0dNG7cGNHR0QgPDweA955dePesAQB8//33uHnzJqZPnw4DAwNMnDgRVapUQUhISIHmfltu8gJ478ge8U6H0YLStWtXFC1aVPqgXLNmDWrXrp1tYfepDA0Ns23PyRmknMrt7/9tVapUwY0bN7BhwwY0bNgQW7duRcOGDbP0Z/kUBfXv3atXLwQFBeH+/fu4desWTp06lee+b5mZmbC2tpbOur57mzp16idlpc8Dix2iD0hPTwcAJCYmAnj9rRdAlpEt735jfqN8+fIYNWoU9u/fjytXriA1NRWzZ89+7+u9+ZabXYfaN6ysrGBkZIQbN25k2Xb9+nXo6OjA3t4+T3lzIi/FQGZmZpZLIzdv3gTwemTTG8WKFUPr1q2xdu1a3Lt3DydOnMjxWR0AUlH67usYGRnBysrqg491cHBAZmZmlud49OgR4uLissxlExERkaUoePeYPvX3b2xsjC5dumDFihWIjIxE69at8csvvyA5Ofm9xwAgy3sjNTUVd+7cydf5eD70PujatSt0dXWxfv16rF27FkWLFkWXLl2y7JeT90X58uXx9OlTNGjQQDrz+vatevXq+XZMpFwsdojeIy0tDfv374eenp402sjBwQG6urpZhjP/+eefaveTkpKyfCCVL18epqamWYYxv83KygoeHh5Yvnx5llFbbz5YdXV14eXlhX///VftcsmjR4+wbt06NGzYEGZmZtJrAlDL+2bIcF69mY/m3Q/wj1mwYIH0sxACCxYsQNGiRdG0aVO1/Xr27ImwsDCMGTMGurq62Y6Eep/g4GBcuHBBuh8VFYV///0XXl5eH52PplWrVgCAuXPnqrXPmTMHANC6dWu19ocPH6oNsU5ISMCqVatQo0YN2NraAvi03//Tp0/V7uvp6cHZ2RlCiPeOQGrWrBn09PQwf/58tULs77//Rnx8fJZj+BTGxsbvfQ+UKFECLVu2xJo1a7B27Vq0aNECJUqUyHbfj70vvv32W2RkZGDatGlZHpuenp7r9yF9njipINH/2bNnD65fvw4AiI2Nxbp16xAeHo7x48dLxYO5uTk6d+6MP/74AyqVCuXLl8fOnTuz9Bu4efMmmjZtim+//RbOzs4oUqQItm3bhkePHn30w3v+/Plo2LAhatasiYEDB8LR0RF3797Frl27cPHiRQDAzz//jAMHDqBhw4YYOnQoihQpgiVLliAlJUVtnhQvLy+UKVMG/fr1k4qH5cuXw8rK6qND4N+nRo0a0NXVxcyZMxEfHw99fX00adIE1tbW732MgYEB9u7dC29vb9StWxd79uzBrl278MMPP2Q549K6dWsUL14cmzdvRsuWLT/4vO+qVq0amjdvjuHDh0NfX18qQv39/T/62OrVq8Pb2xtLly5FXFwcGjVqhDNnzmDlypVo3749GjdurLZ/pUqV0K9fP5w9exY2NjZYvnw5Hj16hBUrVkj7fMrv38vLC7a2tmjQoAFsbGxw7do1LFiwAK1bt4apqWm2j7GyssKECRPg7++PFi1aoG3btrhx4wb+/PNPfPHFF3m+lJSdWrVq4eDBg5gzZw7s7Ozg6OiIunXrStt79eqFb775BgCyLVSAnL0vGjVqhEGDBmH69Om4ePEivLy8ULRoUYSHh2Pz5s2YN2+e9DpE7yXfQDAizZDd0HMDAwNRo0YNsWjRoixDnh8/fiw6deokjIyMhKWlpRg0aJC4cuWK2tDzJ0+eCB8fH+Hk5CSMjY2Fubm5qFu3rti0aVOOMl25ckV06NBBWFhYCAMDA1G5cmUxceJEtX0uXLggmjdvLkxMTISRkZFo3LixOHnyZJbnOn/+vKhbt67Q09MTZcqUEXPmzHnv0PPWrVtnefy7Q6KFEOKvv/4S5cqVE7q6uh8dhu7t7S2MjY3FrVu3hJeXlzAyMhI2NjZi8uTJasOj3zZ06FABQKxbt+79v6R3ABA+Pj5izZo1omLFikJfX1+4ubllyfZmOPiboc1vS0tLE/7+/sLR0VEULVpU2NvbiwkTJojk5GS1/d78rvbt2ydcXV2Fvr6+cHJyEps3b87ynDn9/b/7e16yZInw8PAQxYsXF/r6+qJ8+fJizJgxIj4+Xtonu+cR4vVQcycnJ1G0aFFhY2MjhgwZIp4/f662T6NGjbId2u7t7S0cHByytL/r+vXrwsPDQxgaGgoAWYahp6SkCEtLS2Fubi5evXqV7evk5n2xdOlSUatWLWFoaChMTU2Fi4uLGDt2rHj48OFHsxKphCiknodERDk0cuRI/P3334iJiYGRkVGOHqNSqeDj46N2WaSglC1bFtWqVeO6aR+Qnp4OOzs7tGnTBn///XeW7b1798aWLVuk/nBEBYl9dohIoyQnJ2PNmjXo1KlTjgsd0jzbt2/H48eP0atXL7mjELHPDhFphtjYWBw8eBBbtmzB06dPMWLECLkjUR6cPn0aly5dwrRp0+Dm5oZGjRrJHYmIxQ4RaYawsDD06NED1tbWmD9/PmrUqCF3JMqDRYsWYc2aNahRo4bawrhEcmKfHSIiIlI09tkhIiIiRWOxQ0RERIrGPjt4PWX5w4cPYWpqmq/r4hAREVHBEULgxYsXsLOzg47O+8/fsNjB62nf36wlRERERNolKioKpUuXfu92FjuANPV6VFSUtCwAERERabaEhATY29u/dwmVN1js4P+v3mtmZsZih4iISMt8rAsKOygTERGRorHYISIiIkVjsUNERESKxj47RESUYxkZGUhLS5M7Bn0mihYtCl1d3U9+HhY7RET0UUIIxMTEIC4uTu4o9JmxsLCAra3tJ82Dx2KHiIg+6k2hY21tDSMjI07ASgVOCIGkpCTExsYCAEqWLJnn52KxQ0REH5SRkSEVOsWLF5c7Dn1GDA0NAQCxsbGwtrbO8yUtdlAmIqIPetNHx8jISOYk9Dl68777lL5iLHaIiChHeOmK5JAf7zsWO0RERKRoLHaIiIjyQe/evdG+ffsc7evp6Ynvv/++QPPQ/8cOykRElGczQp4U2muNdyuR68d4enqiRo0amDt3boE+hnIvICAA33//faFMZ8AzO0RERKRoLHaIiEiRevfujaCgIMybNw8qlQoqlQp3795FUFAQ6tSpA319fZQsWRLjx49Henr6Bx+TkZGBfv36wdHREYaGhqhcuTLmzZv3SfnS09MxbNgwmJubo0SJEpg4cSKEENJ2lUqF7du3qz3GwsICAQEBAIDU1FQMGzYMJUuWhIGBARwcHDB9+vQcvXZcXBwGDRoEGxsbGBgYoFq1ati5c6e0fevWrahatSr09fVRtmxZzJ49W+3xH8t29+5dqFQq/PPPP2jcuDGMjIxQvXp1BAcHAwCOHDmCPn36ID4+Xvo9T5kyJUfZ84KXsYiISJHmzZuHmzdvolq1apg6dSqA13MGtWrVCr1798aqVatw/fp1DBgwAAYGBpgyZUq2j7GyskJmZiZKly6NzZs3o3jx4jh58iQGDhyIkiVL4ttvv81TvpUrV6Jfv344c+YMzp07h4EDB6JMmTIYMGBAjh4/f/58/Pfff9i0aRPKlCmDqKgoREVFffRxmZmZaNmyJV68eIE1a9agfPnyCAsLk+awOX/+PL799ltMmTIFXbp0wcmTJzF06FAUL14cvXv3ztUx/vjjj/jtt99QsWJF/Pjjj+jWrRsiIiJQv359zJ07F5MmTcKNGzcAACYmJrl67txgsZNHhXGdOi/Xp4mI6DVzc3Po6enByMgItra2AF5/+Nrb22PBggVQqVRwcnLCw4cPMW7cOEyaNCnbxwCArq4u/P39pfuOjo4IDg7Gpk2b8lzs2Nvb4/fff4dKpULlypVx+fJl/P777zkudiIjI1GxYkU0bNgQKpUKDg4OOXrcwYMHcebMGVy7dg2VKlUCAJQrV07aPmfOHDRt2hQTJ04EAFSqVAlhYWH43//+l+tiZ/To0WjdujUAwN/fH1WrVkVERAScnJxgbm4OlUql9nsuKLyMRUREn41r167B3d1dbe6WBg0aIDExEffv3//gYxcuXIhatWrBysoKJiYmWLp0KSIjI/OcpV69emo53N3dER4ejoyMjBw9vnfv3rh48SIqV66M4cOHY//+/Tl63MWLF1G6dGmp0HnXtWvX0KBBA7W2Bg0a5CrbG66urtLPb5Z7eLP8Q2FisUNERPQRGzZswOjRo9GvXz/s378fFy9eRJ8+fZCamlpgr6lSqdT68ADqswjXrFkTd+7cwbRp0/Dq1St8++23+Oabbz76vG+WYCjIbG8ULVpU7THA68tohY2XsYiISLH09PTUzkZUqVIFW7duhRBC+vA9ceIETE1NUbp06Wwf82af+vXrY+jQoVLbrVu3Pinb6dOn1e6fOnUKFStWlPrOWFlZITo6WtoeHh6OpKQktceYmZmhS5cu6NKlC7755hu0aNECz549Q7Fixd77uq6urrh//z5u3ryZ7dmdKlWq4MSJE2ptJ06cQKVKlXKV7WOy+z0XFJ7ZISIixSpbtixOnz6Nu3fv4smTJxg6dCiioqLg6+uL69ev499//8XkyZPh5+cHHR2dbB+TmZmJihUr4ty5c9i3bx9u3ryJiRMn4uzZs5+ULTIyEn5+frhx4wbWr1+PP/74AyNGjJC2N2nSBAsWLEBISAjOnTuHwYMHq50pmTNnDtavX4/r16/j5s2b2Lx5M2xtbWFhYfHB123UqBE8PDzQqVMnHDhwAHfu3MGePXuwd+9eAMCoUaMQGBiIadOm4ebNm1i5ciUWLFiA0aNH5zhbTpQtWxaJiYkIDAzEkydPcl0s5QaLHSIiUqzRo0dDV1cXzs7OsLKyQlpaGnbv3o0zZ86gevXqGDx4MPr164effvrpvY+JjIzEoEGD0LFjR3Tp0gV169bF06dP1c7y5EWvXr3w6tUr1KlTBz4+PhgxYgQGDhwobZ89ezbs7e3x5Zdfonv37hg9erTaYqympqaYNWsWateujS+++AJ3797F7t27paLtQ7Zu3YovvvgC3bp1g7OzM8aOHSudZalZsyY2bdqEDRs2oFq1apg0aRKmTp2q1jn5Y9lyon79+hg8eDC6dOkCKysrzJo1K1ePzw2VePei22coISEB5ubmiI+Ph5mZWY4ew9FYRPS5SE5Oxp07d+Do6AgDAwO549Bn5kPvv5x+fvPMDhERESkaix0iIqJ8FBkZCRMTk/fePmW4ek6sXbv2va9dtWrVAn1tTcXRWERERPnIzs4OFy9e/OD2gtS2bVvUrVs322257USsFCx2iIiI8lGRIkVQoUIF2V7f1NQUpqamsr2+JuJlLCIiIlI0FjtERJQjcsx8S5Qf7ztexiIiog/S09ODjo4OHj58CCsrK+jp6amt6URUEIQQSE1NxePHj6GjowM9Pb08PxeLHSIi+iAdHR04OjoiOjoaDx8+lDsOfWaMjIxQpkyZHE2W+D4sdoiI6KP09PRQpkwZpKenF9p6RkS6urooUqTIJ59JZLFDREQ5olKpULRo0c92+DJpL3ZQJiIiIkVjsUNERESKxmKHiIiIFI3FDhERESkaix0iIiJSNBY7REREpGiyFjuLFi2Cq6srzMzMYGZmBnd3d+zZs0fanpycDB8fHxQvXhwmJibo1KkTHj16pPYckZGRaN26NYyMjGBtbY0xY8YgPT29sA+FiIiINJSsxU7p0qUxY8YMnD9/HufOnUOTJk3Qrl07XL16FQAwcuRI7NixA5s3b0ZQUBAePnyIjh07So/PyMhA69atkZqaipMnT2LlypUICAjApEmT5DokIiIi0jAqIYSQO8TbihUrhv/973/45ptvYGVlhXXr1uGbb74BAFy/fh1VqlRBcHAw6tWrhz179uDrr7/Gw4cPYWNjAwBYvHgxxo0bh8ePH+d4HY2EhASYm5sjPj4eZmZmOXrMjJAneTvAXBjvVqLAX4OIiEhb5fTzW2P67GRkZGDDhg14+fIl3N3dcf78eaSlpaFZs2bSPk5OTihTpgyCg4MBAMHBwXBxcZEKHQBo3rw5EhISpLND2UlJSUFCQoLajYiIiJRJ9mLn8uXLMDExgb6+PgYPHoxt27bB2dkZMTEx0NPTg4WFhdr+NjY2iImJAQDExMSoFTpvtr/Z9j7Tp0+Hubm5dLO3t8/fgyIiIiKNIXuxU7lyZVy8eBGnT5/GkCFD4O3tjbCwsAJ9zQkTJiA+Pl66RUVFFejrERERkXxkXwhUT08PFSpUAADUqlULZ8+exbx589ClSxekpqYiLi5O7ezOo0ePYGtrCwCwtbXFmTNn1J7vzWitN/tkR19fH/r6+vl8JERERKSJZD+z867MzEykpKSgVq1aKFq0KAIDA6VtN27cQGRkJNzd3QEA7u7uuHz5MmJjY6V9Dhw4ADMzMzg7Oxd6diIiItI8sp7ZmTBhAlq2bIkyZcrgxYsXWLduHY4cOYJ9+/bB3Nwc/fr1g5+fH4oVKwYzMzP4+vrC3d0d9erVAwB4eXnB2dkZPXv2xKxZsxATE4OffvoJPj4+PHNDREREAGQudmJjY9GrVy9ER0fD3Nwcrq6u2LdvH7766isAwO+//w4dHR106tQJKSkpaN68Of7880/p8bq6uti5cyeGDBkCd3d3GBsbw9vbG1OnTpXrkIiIiEjDaNw8O3LgPDtERETaR+vm2SEiIiIqCCx2iIiISNFY7BAREZGisdghIiIiRWOxQ0RERIrGYoeIiIgUjcUOERERKRqLHSIiIlI0FjtERESkaCx2iIiISNFY7BAREZGisdghIiIiRWOxQ0RERIrGYoeIiIgUjcUOERERKRqLHSIiIlI0FjtERESkaCx2iIiISNFY7BAREZGisdghIiIiRWOxQ0RERIrGYoeIiIgUjcUOERERKRqLHSIiIlI0FjtERESkaCx2iIiISNFY7BAREZGisdghIiIiRWOxQ0RERIrGYoeIiIgUjcUOERERKRqLHSIiIlI0FjtERESkaCx2iIiISNFY7BAREZGisdghIiIiRWOxQ0RERIrGYoeIiIgUjcUOERERKRqLHSIiIlI0FjtERESkaCx2iIiISNFY7BAREZGiyVrsTJ8+HV988QVMTU1hbW2N9u3b48aNG2r7eHp6QqVSqd0GDx6stk9kZCRat24NIyMjWFtbY8yYMUhPTy/MQyEiIiINVUTOFw8KCoKPjw+++OILpKen44cffoCXlxfCwsJgbGws7TdgwABMnTpVum9kZCT9nJGRgdatW8PW1hYnT55EdHQ0evXqhaJFi+LXX38t1OMhIiIizSNrsbN37161+wEBAbC2tsb58+fh4eEhtRsZGcHW1jbb59i/fz/CwsJw8OBB2NjYoEaNGpg2bRrGjRuHKVOmQE9Pr0CPQdvNCHlS4K8x3q1Egb8GERHR+2hUn534+HgAQLFixdTa165dixIlSqBatWqYMGECkpKSpG3BwcFwcXGBjY2N1Na8eXMkJCTg6tWr2b5OSkoKEhIS1G5ERESkTLKe2XlbZmYmvv/+ezRo0ADVqlWT2rt37w4HBwfY2dnh0qVLGDduHG7cuIF//vkHABATE6NW6ACQ7sfExGT7WtOnT4e/v38BHQkRERFpEo0pdnx8fHDlyhUcP35crX3gwIHSzy4uLihZsiSaNm2KW7duoXz58nl6rQkTJsDPz0+6n5CQAHt7+7wFJyIiIo2mEZexhg0bhp07d+Lw4cMoXbr0B/etW7cuACAiIgIAYGtri0ePHqnt8+b++/r56Ovrw8zMTO1GREREyiRrsSOEwLBhw7Bt2zYcOnQIjo6OH33MxYsXAQAlS5YEALi7u+Py5cuIjY2V9jlw4ADMzMzg7OxcILmJiIhIe8h6GcvHxwfr1q3Dv//+C1NTU6mPjbm5OQwNDXHr1i2sW7cOrVq1QvHixXHp0iWMHDkSHh4ecHV1BQB4eXnB2dkZPXv2xKxZsxATE4OffvoJPj4+0NfXl/PwiIiISAPIemZn0aJFiI+Ph6enJ0qWLCndNm7cCADQ09PDwYMH4eXlBScnJ4waNQqdOnXCjh07pOfQ1dXFzp07oaurC3d3d3z33Xfo1auX2rw8RERE9PmS9cyOEOKD2+3t7REUFPTR53FwcMDu3bvzKxYREREpiEZ0UCYiIiIqKCx2iIiISNFY7BAREZGisdghIiIiRWOxQ0RERIrGYoeIiIgUjcUOERERKRqLHSIiIlI0FjtERESkaCx2iIiISNFY7BAREZGisdghIiIiRWOxQ0RERIrGYoeIiIgUjcUOERERKRqLHSIiIlI0FjtERESkaCx2iIiISNFY7BAREZGisdghIiIiRWOxQ0RERIrGYoeIiIgUjcUOERERKRqLHSIiIlI0FjtERESkaCx2iIiISNHyVOxMnToVSUlJWdpfvXqFqVOnfnIoIiIiovySp2LH398fiYmJWdqTkpLg7+//yaGIiIiI8kueih0hBFQqVZb20NBQFCtW7JNDEREREeWXIrnZ2dLSEiqVCiqVCpUqVVIreDIyMpCYmIjBgwfne0giIiKivMpVsTN37lwIIdC3b1/4+/vD3Nxc2qanp4eyZcvC3d0930MSERER5VWuih1vb28AgKOjI+rXr4+iRYsWSCgiIiKi/JKrYueNRo0aITMzEzdv3kRsbCwyMzPVtnt4eORLOCIiIqJPladi59SpU+jevTvu3bsHIYTaNpVKhYyMjHwJR0RERPSp8lTsDB48GLVr18auXbtQsmTJbEdmEREREWmCPBU74eHh2LJlCypUqJDfeYiIiIjyVZ7m2albty4iIiLyOwsRERFRvsvTmR1fX1+MGjUKMTExcHFxyTIqy9XVNV/CEREREX2qPBU7nTp1AgD07dtXalOpVNLMyuygTERERJoiT8XOnTt38jsHERERUYHIU7Hj4OCQ3zmIiIiICkSeip1Vq1Z9cHuvXr3yFIaIiIgov+Wp2BkxYoTa/bS0NCQlJUFPTw9GRkYsdoiIiEhj5Gno+fPnz9VuiYmJuHHjBho2bIj169fn+HmmT5+OL774AqamprC2tkb79u1x48YNtX2Sk5Ph4+OD4sWLw8TEBJ06dcKjR4/U9omMjETr1q1hZGQEa2trjBkzBunp6Xk5NCIiIlKYPBU72alYsSJmzJiR5azPhwQFBcHHxwenTp3CgQMHkJaWBi8vL7x8+VLaZ+TIkdixYwc2b96MoKAgPHz4EB07dpS2Z2RkoHXr1khNTcXJkyexcuVKBAQEYNKkSfl1aERERKTF8nQZ671PVqQIHj58mOP99+7dq3Y/ICAA1tbWOH/+PDw8PBAfH4+///4b69atQ5MmTQAAK1asQJUqVXDq1CnUq1cP+/fvR1hYGA4ePAgbGxvUqFED06ZNw7hx4zBlyhTo6enl5yESERGRlslTsfPff/+p3RdCIDo6GgsWLECDBg3yHCY+Ph4AUKxYMQDA+fPnkZaWhmbNmkn7ODk5oUyZMggODka9evUQHBwMFxcX2NjYSPs0b94cQ4YMwdWrV+Hm5pbldVJSUpCSkiLdT0hIyHNmIiIi0mx5Knbat2+vdl+lUsHKygpNmjTB7Nmz8xQkMzMT33//PRo0aIBq1aoBAGJiYqCnpwcLCwu1fW1sbBATEyPt83ah82b7m23ZmT59Ovz9/fOUk4iIiLRLnoqdzMzM/M4BHx8fXLlyBcePH8/3537XhAkT4OfnJ91PSEiAvb19gb8uERERFb5P7rMjhADw+uxOXg0bNgw7d+7E0aNHUbp0aand1tYWqampiIuLUzu78+jRI9ja2kr7nDlzRu353ozWerPPu/T19aGvr5/nvERERKQ98jwaa9WqVXBxcYGhoSEMDQ3h6uqK1atX5+o5hBAYNmwYtm3bhkOHDsHR0VFte61atVC0aFEEBgZKbTdu3EBkZCTc3d0BAO7u7rh8+TJiY2OlfQ4cOAAzMzM4Ozvn9fCIiIhIIfJ0ZmfOnDmYOHEihg0bJnVIPn78OAYPHownT55g5MiROXoeHx8frFu3Dv/++y9MTU2lPjbm5uYwNDSEubk5+vXrBz8/PxQrVgxmZmbw9fWFu7s76tWrBwDw8vKCs7MzevbsiVmzZiEmJgY//fQTfHx8ePaGiIiI8lbs/PHHH1i0aJHaTMlt27ZF1apVMWXKlBwXO4sWLQIAeHp6qrWvWLECvXv3BgD8/vvv0NHRQadOnZCSkoLmzZvjzz//lPbV1dXFzp07MWTIELi7u8PY2Bje3t6YOnVqXg6NiIiIFCZPxU50dDTq16+fpb1+/fqIjo7O8fO86e/zIQYGBli4cCEWLlz43n0cHBywe/fuHL8uERERfT7y1GenQoUK2LRpU5b2jRs3omLFip8cioiIiCi/5OnMjr+/P7p06YKjR49KfXZOnDiBwMDAbIsgIiIiIrnk6cxOp06dcPr0aZQoUQLbt2/H9u3bUaJECZw5cwYdOnTI74xEREREeZbneXZq1aqFNWvW5GcWIiIionyXpzM7u3fvxr59+7K079u3D3v27PnkUERERET5JU/Fzvjx45GRkZGlXQiB8ePHf3IoIiIiovySp2InPDw829mJnZycEBER8cmhiIiIiPJLnoodc3Nz3L59O0t7REQEjI2NPzkUERERUX7JU7HTrl07fP/997h165bUFhERgVGjRqFt27b5Fo6IiIjoU+Wp2Jk1axaMjY3h5OQER0dHODo6okqVKihevDh+++23/M5IRERElGd5Gnpubm6OkydP4sCBAwgNDZVWPffw8MjvfERERESfJM/z7KhUKnh5ecHLy+u9+7i4uGD37t2wt7fP68sQERERfZI8XcbKqbt37yItLa0gX4KIiIjogwq02CEiIiKSG4sdIiIiUjQWO0RERKRoLHaIiIhI0VjsEBERkaLlW7ETFxeXpW3JkiWwsbHJr5cgIiIiyrU8FTszZ87Exo0bpfvffvstihcvjlKlSiE0NFRq7969O9fKIiIiIlnlqdhZvHixNFHggQMHcODAAezZswctW7bEmDFj8jUgERER0afI0wzKMTExUrGzc+dOfPvtt/Dy8kLZsmVRt27dfA1IRERE9CnydGbH0tISUVFRAIC9e/eiWbNmAAAhBDIyMvIvHREREdEnytOZnY4dO6J79+6oWLEinj59ipYtWwIAQkJCUKFChXwNSERERPQp8lTs/P777yhbtiyioqIwa9YsmJiYAACio6MxdOjQfA1IRERE9CnyVOwULVoUo0ePztI+cuTITw5ERERElJ/yVOysWrXqg9t79eqVpzBERERE+S1Pxc6IESPU7qelpSEpKQl6enowMjJisUNEREQaI0+jsZ4/f652S0xMxI0bN9CwYUOsX78+vzMSERER5Vm+LRdRsWJFzJgxI8tZHyIiIiI55etCoEWKFMHDhw/z8ymJiIiIPkme+uz8999/aveFEIiOjsaCBQvQoEGDfAlGRERElB/yVOy0b99e7b5KpYKVlRWaNGmC2bNn50cuIiIionyRp2InMzMzv3MQ5dmMkCcF/hrj3UoU+GsQEVHB+OQ+O0IICCHyIwsRERFRvstzsfP333+jWrVqMDAwgIGBAapVq4Zly5blZzYiIiKiT5any1iTJk3CnDlz4OvrC3d3dwBAcHAwRo4cicjISEydOjVfQxIRERHlVZ6KnUWLFuGvv/5Ct27dpLa2bdvC1dUVvr6+LHaIiIhIY+TpMlZaWhpq166dpb1WrVpIT0//5FBERERE+SVPxU7Pnj2xaNGiLO1Lly5Fjx49PjkUERERUX7J8WUsPz8/6WeVSoVly5Zh//79qFevHgDg9OnTiIyM5CKgREREpFFyXOyEhISo3a9VqxYA4NatWwCAEiVKoESJErh69Wo+xiMiIiL6NDkudg4fPpzrJ79//z7s7Oygo5OvS3ARERER5ViBViHOzs64e/fue7cfPXoUbdq0gZ2dHVQqFbZv3662vXfv3lCpVGq3Fi1aqO3z7Nkz9OjRA2ZmZrCwsEC/fv2QmJhYAEdDRERE2qhAi52Pzaz88uVLVK9eHQsXLnzvPi1atEB0dLR0W79+vdr2Hj164OrVqzhw4AB27tyJo0ePYuDAgfmSn4iIiLRfnubZyS8tW7ZEy5YtP7iPvr4+bG1ts9127do17N27F2fPnpWGwv/xxx9o1aoVfvvtN9jZ2eV7ZiIiItIuGt+Z5siRI7C2tkblypUxZMgQPH36VNoWHBwMCwsLtTl/mjVrBh0dHZw+ffq9z5mSkoKEhAS1GxERESmTRhc7LVq0wKpVqxAYGIiZM2ciKCgILVu2REZGBgAgJiYG1tbWao8pUqQIihUrhpiYmPc+7/Tp02Fubi7d7O3tC/Q4iIiISD4FehlLpVJ90uO7du0q/ezi4gJXV1eUL18eR44cQdOmTfP8vBMmTFCbNyghIYEFDxERkULJ2kE5t8qVK4cSJUogIiICAGBra4vY2Fi1fdLT0/Hs2bP39vMBXvcDMjMzU7sRERGRMhVosRMWFgYHB4d8e7779+/j6dOnKFmyJADA3d0dcXFxOH/+vLTPoUOHkJmZibp16+bb6xIREZH2yvFlrI4dO+b4Sf/55x8A+OilocTEROksDQDcuXMHFy9eRLFixVCsWDH4+/ujU6dOsLW1xa1btzB27FhUqFABzZs3BwBUqVIFLVq0wIABA7B48WKkpaVh2LBh6Nq1K0diEREREYBcFDvm5ub5/uLnzp1D48aNpftv+tF4e3tj0aJFuHTpElauXIm4uDjY2dnBy8sL06ZNg76+vvSYtWvXYtiwYWjatCl0dHTQqVMnzJ8/P9+zEhERkXbKcbGzYsWKfH9xT0/PD/br2bdv30efo1ixYli3bl1+xiKSxYyQJwX+GuPdShT4axARaRqNHnpORERE9KnyPPR8y5Yt2LRpEyIjI5Gamqq27cKFC58cjIi0U0GfoeLZKSLKrTyd2Zk/fz769OkDGxsbhISEoE6dOihevDhu37790eUfiIiIiApTnoqdP//8E0uXLsUff/wBPT09jB07FgcOHMDw4cMRHx+f3xmJiIiI8ixPxU5kZCTq168PADA0NMSLFy8AAD179syyKjkRERGRnPJU7Nja2uLZs2cAgDJlyuDUqVMAXs+Tk9+zJhMRERF9ijwVO02aNMF///0HAOjTpw9GjhyJr776Cl26dEGHDh3yNSARERHRp8jTaKylS5ciMzMTAODj44PixYvj5MmTaNu2LQYNGpSvAYmIiIg+RZ6Knfv376stBdG1a1d07doVQghERUWhTJky+RaQiIiI6FPk6TKWo6MjHj9+nKX92bNncHR0/ORQRERERPklT8WOEAIqlSpLe2JiIgwMDD45FBEREVF+ydVlrDcLdapUKkycOBFGRkbStoyMDJw+fRo1atTI14BEREREnyJXxU5ISAiA12d2Ll++DD09PWmbnp4eqlevjtGjR+dvQiIiIqJPkKti5/DhwwBeDzefN28ezMzMCiQUERERUX7J02isFStWSD/fv38fAFC6dOn8SURERESUj/LUQTkzMxNTp06Fubk5HBwc4ODgAAsLC0ybNk2af4eIiIhIE+TpzM6PP/6Iv//+GzNmzECDBg0AAMePH8eUKVOQnJyMX375JV9DEhEREeVVnoqdlStXYtmyZWjbtq3U5urqilKlSmHo0KEsdoiIiEhj5Oky1rNnz+Dk5JSl3cnJSVoglIiIiEgT5KnYqV69OhYsWJClfcGCBahevfonhyIiIiLKL3m6jDVr1iy0bt0aBw8ehLu7OwAgODgYUVFR2L17d74GJCIqbDNCnhT4a4x3K1Hgr0FEr+V5baybN2+iQ4cOiIuLQ1xcHDp27IgbN27AwcEhvzMSERER5Vmezuw4OjoiOjo6S0fkp0+fwt7eHhkZGfkSjoiIiOhT5Xkh0OxwIVAiIiLSNHleCHTSpElcCJSIiIg0HhcCJSIiIkXjQqBERESkaJ+8ECgRERGRJstTB2UiIiIibcFih4iIiBSNxQ4REREpGosdIiIiUjQWO0RERKRoLHaIiIhI0VjsEBERkaKx2CEiIiJFY7FDREREisZih4iIiBSNxQ4REREpGosdIiIiUjQWO0RERKRoLHaIiIhI0VjsEBERkaLJWuwcPXoUbdq0gZ2dHVQqFbZv3662XQiBSZMmoWTJkjA0NESzZs0QHh6uts+zZ8/Qo0cPmJmZwcLCAv369UNiYmIhHgURERFpMlmLnZcvX6J69epYuHBhtttnzZqF+fPnY/HixTh9+jSMjY3RvHlzJCcnS/v06NEDV69exYEDB7Bz504cPXoUAwcOLKxDICIiIg1XRM4Xb9myJVq2bJntNiEE5s6di59++gnt2rUDAKxatQo2NjbYvn07unbtimvXrmHv3r04e/YsateuDQD4448/0KpVK/z222+ws7MrtGMhItI0M0KeFPhrjHcrUeCvQfSpNLbPzp07dxATE4NmzZpJbebm5qhbty6Cg4MBAMHBwbCwsJAKHQBo1qwZdHR0cPr06fc+d0pKChISEtRuREREpEwaW+zExMQAAGxsbNTabWxspG0xMTGwtrZW216kSBEUK1ZM2ic706dPh7m5uXSzt7fP5/RERESkKTS22ClIEyZMQHx8vHSLioqSOxIREREVEI0tdmxtbQEAjx49Umt/9OiRtM3W1haxsbFq29PT0/Hs2TNpn+zo6+vDzMxM7UZERETKpLHFjqOjI2xtbREYGCi1JSQk4PTp03B3dwcAuLu7Iy4uDufPn5f2OXToEDIzM1G3bt1Cz0xERESaR9bRWImJiYiIiJDu37lzBxcvXkSxYsVQpkwZfP/99/j5559RsWJFODo6YuLEibCzs0P79u0BAFWqVEGLFi0wYMAALF68GGlpaRg2bBi6du3KkVhEREQEQOZi59y5c2jcuLF038/PDwDg7e2NgIAAjB07Fi9fvsTAgQMRFxeHhg0bYu/evTAwMJAes3btWgwbNgxNmzaFjo4OOnXqhPnz5xf6sRAREZFmkrXY8fT0hBDivdtVKhWmTp2KqVOnvnefYsWKYd26dQURj4iIiBRAY/vsEBEREeUHFjtERESkaCx2iIiISNFY7BAREZGisdghIiIiRWOxQ0RERIrGYoeIiIgUjcUOERERKRqLHSIiIlI0FjtERESkaCx2iIiISNFY7BAREZGisdghIiIiRWOxQ0RERIrGYoeIiIgUjcUOERERKRqLHSIiIlI0FjtERESkaCx2iIiISNFY7BAREZGisdghIiIiRWOxQ0RERIrGYoeIiIgUjcUOERERKRqLHSIiIlI0FjtERESkaCx2iIiISNFY7BAREZGisdghIiIiRWOxQ0RERIrGYoeIiIgUjcUOERERKRqLHSIiIlI0FjtERESkaCx2iIiISNFY7BAREZGisdghIiIiRWOxQ0RERIrGYoeIiIgUjcUOERERKRqLHSIiIlI0FjtERESkaCx2iIiISNE0vtiZMmUKVCqV2s3JyUnanpycDB8fHxQvXhwmJibo1KkTHj16JGNiIiIi0iQaX+wAQNWqVREdHS3djh8/Lm0bOXIkduzYgc2bNyMoKAgPHz5Ex44dZUxLREREmqSI3AFyokiRIrC1tc3SHh8fj7///hvr1q1DkyZNAAArVqxAlSpVcOrUKdSrV6+woxIREZGG0YozO+Hh4bCzs0O5cuXQo0cPREZGAgDOnz+PtLQ0NGvWTNrXyckJZcqUQXBwsFxxiYiISINo/JmdunXrIiAgAJUrV0Z0dDT8/f3x5Zdf4sqVK4iJiYGenh4sLCzUHmNjY4OYmJj3PmdKSgpSUlKk+wkJCQUVn4iIiGSm8cVOy5YtpZ9dXV1Rt25dODg4YNOmTTA0NMzTc06fPh3+/v75FZGIiIg0mFZcxnqbhYUFKlWqhIiICNja2iI1NRVxcXFq+zx69CjbPj5vTJgwAfHx8dItKiqqgFMTERGRXLSu2ElMTMStW7dQsmRJ1KpVC0WLFkVgYKC0/caNG4iMjIS7u/t7n0NfXx9mZmZqNyIiIlImjb+MNXr0aLRp0wYODg54+PAhJk+eDF1dXXTr1g3m5ubo168f/Pz8UKxYMZiZmcHX1xfu7u4ciUVEREQAtKDYuX//Prp164anT5/CysoKDRs2xKlTp2BlZQUA+P3336Gjo4NOnTohJSUFzZs3x59//ilzaiIiItIUGl/sbNiw4YPbDQwMsHDhQixcuLCQEhEREZE20fhih4iIPl8zQp4U+GuMdytR4K+hlOPQVlrXQZmIiIgoN1jsEBERkaKx2CEiIiJFY7FDREREisZih4iIiBSNxQ4REREpGosdIiIiUjQWO0RERKRoLHaIiIhI0VjsEBERkaKx2CEiIiJFY7FDREREisZih4iIiBSNxQ4REREpGosdIiIiUjQWO0RERKRoLHaIiIhI0VjsEBERkaKx2CEiIiJFY7FDREREisZih4iIiBSNxQ4REREpGosdIiIiUjQWO0RERKRoLHaIiIhI0VjsEBERkaKx2CEiIiJFY7FDREREisZih4iIiBSNxQ4REREpGosdIiIiUjQWO0RERKRoLHaIiIhI0VjsEBERkaKx2CEiIiJFY7FDREREisZih4iIiBStiNwBiIiISDvMCHlSoM8/3q1EgTwvz+wQERGRorHYISIiIkVjsUNERESKxmKHiIiIFE0xxc7ChQtRtmxZGBgYoG7dujhz5ozckYiIiEgDKKLY2bhxI/z8/DB58mRcuHAB1atXR/PmzREbGyt3NCIiIpKZIoqdOXPmYMCAAejTpw+cnZ2xePFiGBkZYfny5XJHIyIiIplpfbGTmpqK8+fPo1mzZlKbjo4OmjVrhuDgYBmTERERkSbQ+kkFnzx5goyMDNjY2Ki129jY4Pr169k+JiUlBSkpKdL9+Ph4AEBCQkKOXzc58UUe0uZOQoJegb+GEo5DCccA8DhySgnHAPA4ckoJxwDwOHIqt8fw5nNbCPHhHYWWe/DggQAgTp48qdY+ZswYUadOnWwfM3nyZAGAN95444033nhTwC0qKuqDtYLWn9kpUaIEdHV18ejRI7X2R48ewdbWNtvHTJgwAX5+ftL9zMxMPHv2DMWLF4dKpSqQnAkJCbC3t0dUVBTMzMwK5DUKmhKOAVDGcSjhGAAehyZRwjEAyjgOJRwDUDjHIYTAixcvYGdn98H9tL7Y0dPTQ61atRAYGIj27dsDeF28BAYGYtiwYdk+Rl9fH/r6+mptFhYWBZz0NTMzM61+8wLKOAZAGcehhGMAeByaRAnHACjjOJRwDEDBH4e5uflH99H6YgcA/Pz84O3tjdq1a6NOnTqYO3cuXr58iT59+sgdjYiIiGSmiGKnS5cuePz4MSZNmoSYmBjUqFEDe/fuzdJpmYiIiD4/iih2AGDYsGHvvWylCfT19TF58uQsl8+0iRKOAVDGcSjhGAAehyZRwjEAyjgOJRwDoFnHoRLiY+O1iIiIiLSX1k8qSERERPQhLHaIiIhI0VjsEBERkaKx2CEiIiJFU8xoLE2Qm7W1lDBRFBER5a+UlBSNGL2kNByNlY90dHRyvNxERkZGAachpYiLi8O2bdtw7Ngx3Lt3D0lJSbCysoKbmxuaN2+O+vXryx0xR5RyHNeuXcOGDRveexydOnXih1UhyczMRFBQULb/Fs2aNYO9vb3cET9qz5490vspKioKmZmZMDY2hpubG7y8vNCnT5+PLoUgt0uXLuV4X1dX1wJM8n4sdvJRUFCQ9PPdu3cxfvx49O7dG+7u7gCA4OBgrFy5EtOnT4e3t7dcMXMkLi4O69evx5AhQwAAPXr0wKtXr6Tturq6+OuvvwptmY3P0cOHDzFp0iSsXbsWdnZ2qFOnDuzs7GBoaIhnz57hypUrOH/+PBwcHDB58mR06dJF7sjZUspxXLhwAWPHjsXx48fRoEGDbI/j2LFjSEhIwNixY/H9999rZdGTmpqK1NRUmJiYyB3lvV69eoXZs2dj0aJFePbsGWrUqJHl3+Lhw4fw8vLCpEmTUK9ePbkjZ7Ft2zaMGzcOL168QKtWrd77fgoODkbv3r0xbdo0WFlZyR07W2++6L+vnHizTaVSyfdF/1NXHafsNWnSRKxbty5L+9q1a0WjRo0KP1AuzZo1S3Tv3l26b2JiIjp16iR69+4tevfuLSpXriwmT54sX8BcOHTokPjtt9/E8ePHhRBCLF68WNjb24sSJUqI/v37i6SkJJkTZs/a2lqMGTNGXL169b37JCUliXXr1ol69eqJ//3vf4WYLueUchxly5YVCxcuFM+fP//gfidPnhRdunQRv/zyS+EE+wTLly8Xw4YNE2vWrBFCCDF+/Hihp6cndHR0RLNmzcSTJ09kTpi90qVLi86dO4tdu3aJ1NTUbPe5e/eu+PXXX4WDg4NYunRpISf8uHr16omdO3eKjIyMD+53//59MW7cODFnzpxCSpZ7d+/ezfFNLix2CoihoaG4efNmlvYbN24IQ0NDGRLlTp06dcSBAwek+yYmJuLWrVvS/X/++UfUqFFDjmi5snTpUqGrqysqVKgg9PX1xa+//iqMjY3F4MGDxdChQ4WZmZkYN26c3DGzldsPGk39YFLKcbzvQzW/9i9sP//8szA0NBTNmjUTxYoVE4MHDxa2trZixowZYtasWaJ06dJi8ODBcsfMVlhYWI73TU1NFREREQWYhrQBi50CUqlSJTFmzJgs7WPGjBGVKlWSIVHulChRQkRGRkr3a9WqJaKioqT7t27dEsbGxnJEy5WqVauK+fPnCyGE2LNnjyhSpIgICAiQtm/atEmUL19ernhEsqlQoYJ09vns2bNCR0dHbNmyRdq+e/duUaZMGbnifZZSUlLE9evXRVpamtxRPklERIQYNmyYaNq0qWjatKnw9fWVveBkn50Csnv3bnTq1AkVKlRA3bp1AQBnzpxBeHg4tm7dilatWsmc8MOMjIxw5swZVKtWLdvtly9fRt26dZGUlFTIyXLHyMgI165dg4ODAwBAT08PoaGhqFKlCgAgMjISFStWREpKipwxs/Xff//leN+2bdsWYJJPo5TjeFdgYCACAwMRGxuLzMxMtW3Lly+XKVXO6evrIyIiQurEq6+vj0uXLqFy5coAgAcPHsDR0RGpqalyxsyWNnSIzY2kpCT4+vpi5cqVAICbN2+iXLly8PX1RalSpTB+/HiZE+bcvn370LZtW9SoUQMNGjQAAJw4cQKhoaHYsWMHvvrqK1lyceh5AWnVqhVu3ryJRYsW4fr16wCANm3aYPDgwVoxQqBcuXK4cOHCe4udc+fOwdHRsZBT5V5ycjIMDQ2l+/r6+mqdRvX19ZGeni5HtI9q37692v13OwC+PfJPk0f3KeU43ubv74+pU6eidu3aKFmyZI5HYWqStLQ0tf8Lenp6KFq0qHS/SJEiGvvvUaNGDbVOrx+iqcfwtgkTJiA0NBRHjhxBixYtpPZmzZphypQpWlXsjB8/HiNHjsSMGTOytI8bN062YoeXsShbP/30k7C3txcxMTFZtkVHRwt7e3vx448/ypAsd3R0dERERISIj48XcXFxwtTUVISGhor4+HgRHx8vbt68KXR0dOSO+VEHDhwQNWvWFHv37pWy7927V9SuXVvs379f7ng5ppTjsLW1FatWrZI7xidRqVTi8OHDIjQ0VISGhgpjY2Oxa9cu6X5gYKDG/t94u8Prtm3bRPny5cXixYul7IsXLxYVK1YU27ZtkztqjpQpU0YEBwcLIdT7R4aHhwtTU1M5o+Wavr7+e/ur6uvry5DoNRY7Bejo0aOiR48ewt3dXdy/f18IIcSqVavEsWPHZE72cQkJCaJKlSrC1NRUDB06VMydO1fMnTtXDBkyRJiamgonJyeRkJAgd8yPUqlUQkdHR7q9776mq1q1arbvm6NHjwonJycZEuWNUo6jWLFisvdB+FRv3vsqlSrLTZv+b3zxxRdi165dWdp37dolatasKUOi3DM0NJQKnLeLnYsXLwozMzM5o+Va6dKlxaZNm7K0b9y4Udjb28uQ6DVexiogW7duRc+ePdGjRw9cuHBB6hMSHx+PX3/9Fbt375Y54YeZmprixIkTmDBhAtavX4+4uDgAgIWFBbp3745ff/0Vpqam8obMgcOHD8sdIV/cunUr2zmNzM3Ncffu3ULPk1dKOY7+/ftj3bp1mDhxotxR8uzOnTtyR8gXly9fzvaSuqOjI8LCwmRIlHu1a9fGrl274OvrC+D/X9pdtmyZNE+bthgwYAAGDhyI27dvSxOFnjhxAjNnzoSfn59sudhBuYC4ublh5MiR6NWrF0xNTREaGopy5cohJCQELVu2RExMjNwRc0wIgcePHwMArKystLJ/grbz8PCAgYEBVq9eDRsbGwDAo0eP0KtXLyQnJ6tNaKnJlHIcI0aMwKpVq+Dq6gpXV1e1vi4AMGfOHJmSfX5q1qyJatWqYdmyZdDT0wPwemLE/v3748qVK7hw4YLMCT/u+PHjaNmyJb777jsEBARg0KBBCAsLw8mTJxEUFIRatWrJHTHHhBCYO3cuZs+ejYcPHwIA7OzsMGbMGAwfPly2zw8WOwXEyMgIYWFhKFu2rFqxc/v2bTg7OyM5OVnuiJ+FTZs2oX379tIfwfv378POzg46Oq/XwE1KSsKCBQswduxYOWN+VEREBDp06ICbN29KHdyjoqJQsWJFbN++HRUqVJA5Yc4o5TgaN2783m0qlQqHDh0qxDR5M2vWLPj6+kod+E+cOIHatWtLnZZfvHiBcePG4c8//5Qz5kedOXMGbdq0gRBCGnl16dIlqFQq7NixA3Xq1JE5Yc7cunULM2bMQGhoKBITE1GzZk2MGzcOLi4uckfLsxcvXgCARlwFYLFTQMqVK4elS5eiWbNmasXOqlWrMGPGDI0/vdq4ceOPVuAqlQqBgYGFlChvdHV1ER0dDWtrawCvF2C9ePEiypUrB+D1WQU7OzutGLEhhMCBAwek0X1VqlRBs2bNtO5Mm1KOQ9sp6f/Gy5cvsXbtWrX3VPfu3WFsbCxzMtIU7LNTQAYMGIARI0Zg+fLlUKlUePjwIYKDgzF69GituM5fo0aN92578eIF1q1bp5Fz07zr3Vpem2t7lUoFLy8veHl5yR3lk7w5Dg8PD+jr62t9kXP//n0AQOnSpWVOkjtK+r9hbGyMgQMHyh3jk2RmZiIiIiLbeZs8PDxkSpV7jx49wujRo6U5qN59X8lVPLPYKSDjx49HZmYmmjZtiqSkJOkP++jRo6VOaJrs999/z9KWnp6OhQsX4pdffkGpUqUwbdo0GZJ9PubPn4+BAwfCwMAA8+fP/+C+w4cPL6RUnyYzMxO//PILFi9ejEePHkmTp02cOBFly5ZFv3795I6YI5mZmfj5558xe/ZsJCYmAnh9qn7UqFH48ccfpcukVDhWr16NJUuW4Pbt2wgODoaDgwN+//13lCtXDu3atZM73kedOnUK3bt3x71797IUB7IunpkHvXv3RmRkJCZOnKhZc1AV+vivz0xKSoq4evWqOH36tHjx4oXccfJszZo1oly5cqJkyZJi4cKFWjOduUqlEo8ePZLuv7vGV0xMjMYOry1btqy0TlTZsmXfe3N0dJQ5ac75+/uLcuXKiTVr1qgNt92wYYOoV6+ezOlybvz48cLKykr8+eef0twuCxcuFFZWVuKHH36QO16OaPP/jbf9+eefokSJEuLnn38WBgYG0jGsWLFCeHp6ypwuZ6pXry46d+4swsLCxPPnz0VcXJzaTZuYmJiIkJAQuWNkwTM7BUxPTw/Ozs5yx8izvXv3Yvz48bhz5w5Gjx4NPz8/rbsOvm/fPpibmwN4/Y08MDAQV65cAQBpSL0mentosFKGCa9atQpLly5F06ZNMXjwYKm9evXqUn8LbbBy5UosW7ZMbXkLV1dXlCpVCkOHDsUvv/wiY7qcW7ZsGUxMTAC8PnMbEBCAEiVKAPj/nUs13R9//IG//voL7du3V5u1t3bt2hg9erSMyXIuPDwcW7Zs0ZoO+h9ib2+vkZdEWezko44dOyIgIABmZmbo2LHjB/f9559/CilV3pw5cwbjxo3DqVOnMHjwYBw8eFD6I6htvL291e4PGjRI7b7GnGZ9j7S0NDg5OWHnzp3Sml7a6sGDB9n+Qc/MzERaWpoMifLm2bNncHJyytLu5OSEZ8+eyZAo98qUKYO//vpLum9ra4vVq1dn2UfT3blzB25ublna9fX18fLlSxkS5V7dunURERGhiGJn7ty5GD9+PJYsWYKyZcvKHUfCYicfmZubSx+cZmZmGv8h+iH16tWDoaEhBg8eDEdHR6xbty7b/TS9r8i7Hf20UdGiRRUzVYGzszOOHTsmLcz6xpYtW7L9wNJU1atXx4IFC7L0pVqwYAGqV68uU6rc0aZJHD/E0dERFy9ezPKe2rt3r9Z8OfD19cWoUaMQExMDFxeXLPM2acNipm906dIFSUlJKF++PIyMjLIci1xfBljs5KMOHTrAwMAAABAQECBvmE9UpkwZqFQqbN++/b37qFQqjS923nj69CmKFy8O4PW8Ln/99ReSk5PRpk0bfPnllzKn+zgfHx/MnDkTy5YtQ5Ei2vvfdtKkSfD29saDBw+QmZmJf/75Bzdu3MCqVauwc+dOuePl2KxZs9C6dWscPHhQmuE2ODgYUVFRGj87utL4+fnBx8cHycnJEELgzJkzWL9+PaZPn45ly5bJHS9HOnXqBADo27ev1Pb2Qqfa1EF57ty5ckfIFufZyUe6urqIiYmBlZVVljksSB6XL19GmzZtpInrNmzYgBYtWuDly5fQ0dHBy5cvsWXLliwrc2uaDh06IDAwECYmJnBxccnSb0rTL4u+7dixY5g6dara5GmTJk3SuiH1Dx8+xMKFC9Xmdhk6dCjs7OxkTpYzhw4dwrBhw3Dq1CmYmZmpbYuPj0f9+vWxaNEirRj2vHbtWkyZMgW3bt0C8HrGXn9/f60Z3Xfv3r0Pbn/3rBXlHoudfGRra4u//voLbdq0gY6ODh49egQrKyu5Y+VJTv4QLl68WOPPirRs2RJFihTB+PHjsXr1auzcuRPNmzeX+ir4+vri/PnzOHXqlMxJP6xPnz4f3L5ixYpCSkJK0bZtWzRu3BgjR47Mdvv8+fNx+PBhbNu2rZCT5V1SUhISExP5JVNDJCcnIzU1Va3t3c+TwsJiJx9NmTIFU6dOzVFfHU0/LamUP4QlSpTAoUOH4OrqisTERJiZmeHs2bPSWjPXr19HvXr1NHpUFmmGS5cuoVq1atDR0cGlS5c+uK829LFwcHD4YL+W69evw8vLC5GRkYWc7PMVFhaGyMjILAXC26P+NN3Lly8xbtw4bNq0CU+fPs2ynZMKKsCUKVPQtWtXREREoG3btlixYkW2Kzxrg9DQUMycOfO92728vPDbb78VYqK8efbsGWxtbQEAJiYmMDY2hqWlpbTd0tJSa4bYAkBsbCxu3LgBAKhcubJWfIMtVqwYbt68iRIlSsDS0vKDXwY0eSRTjRo1EBMTA2tra9SoUUPqU/Eubelj8ejRoyydR99WpEgRaQFgTVOzZk0EBgbC0tISbm5uH3xPacNCoLdv30aHDh1w+fJltffVm+PShvfTG2PHjsXhw4exaNEi9OzZEwsXLsSDBw+wZMkStakBChuLnXzm5OQEJycnTJ48GZ07d4aRkZHckfJEm/8QvuvdP4TaOEouISEBPj4+2LBhg/SHT1dXF126dMHChQuleYQ00e+//y4tBKipnRdz4s6dO9JlaSXMe1SqVClcuXLlvcOdL126hJIlSxZyqpxp166dtGCppve3y4kRI0bA0dERgYGBcHR0xJkzZ/D06VOMGjVKK75Uvm3Hjh1YtWoVPD090adPH3z55ZeoUKECHBwcsHbtWvTo0UOWXCx2CsjkyZPljvBJtPkP4bt69+4t/WFMTk7G4MGDpQ6+2rC+F/B6rbWQkBDs3LlTbfTPiBEjMGjQIGzYsEHmhO8XGhqKb775Bvr6+nB0dET9+vW1ckTZ251EldBhtFWrVpg4cSJatGghjSJ949WrV5g8eTK+/vprmdJ9mKWlpbQkR58+fVC6dGmtXqIjODgYhw4dQokSJaCjowMdHR00bNgQ06dPx/DhwxESEiJ3xBx79uyZtJismZmZdLa2YcOGGDJkiGy52GcnHynp1Kqvry+OHDmCs2fPZvuHsE6dOmjcuPFH12yS28c69r6h6R18jY2NsW/fPjRs2FCt/dixY9LoMk1VtGhR3L9/HzY2NooZpbhy5UqUKFECrVu3BvD61P3SpUvh7OyM9evXa0Ux9OjRI9SsWRO6uroYNmwYKleuDOB1X52FCxciIyMDFy5cgI2NjcxJsypSpAgePnwIa2trRbynLC0tceHCBTg6OqJ8+fJYtmwZGjdujFu3bsHFxQVJSUlyR8wxV1dX/PHHH2jUqBGaNWuGGjVq4LfffsP8+fMxa9YsaeHcwqZ9X680mJJOrf7000/4559/UKlSpff+Ifzxxx9lTvlxml7E5FTx4sWzvVRlbm6u1gdJE5UtWxbz58+Hl5cXhBAIDg5+b2ZtGOYMAL/++isWLVoE4PW38gULFmDu3LnYuXMnRo4cqRVTAdjY2ODkyZMYMmQIJkyYoNZPpHnz5li4cKFGFjrA66HlW7duRatWrSCEwP3799878aY2zAJdrVo1hIaGwtHREXXr1sWsWbOgp6eHpUuXSmdJtEWfPn0QGhqKRo0aYfz48WjTpg0WLFiAtLQ0zJkzR75ghb8cF2mLu3fvipYtWwodHR2hUqmESqUSOjo6omXLluL27dtyx/usLFmyRDRr1kxER0dLbdHR0cLLy0ssXrxYxmQft23bNmFjYyO9f968l969acOik28YGhqKe/fuCSGEGDt2rOjZs6cQQogrV66IEiVKyBktx27duiUyMzOFEEI8e/ZMnDlzRpw+fVo8e/ZM5mQft2TJEqGnpyd0dHTee9Om99TevXvF1q1bhRBChIeHi8qVKwuVSiVKlCghAgMDZU73ae7evSu2bt0qQkNDZc3By1gF5OzZs8jMzETdunXV2k+fPg1dXV3Url1bpmS59/z5c0REREAIgYoVK2r8mQSlePdSaHh4OFJSUqRvqpGRkdDX10fFihU1/rIoAGno/40bN957yUGTO1q/zdraGvv27YObmxvc3Nzg5+eHnj174tatW6hevToSExPljvhR717+6dKlC+bPn6+xZ3Pe9eLFC9y7dw+urq44ePCgNEP6u7Rl+Y53PXv27KOjFynneBmrgPj4+GDs2LFZip0HDx5g5syZOH36tEzJcs/S0hJffPGF3DE+O9p+KfRdJiYmOHz4MBwdHbWyg/LbvvrqK/Tv3x9ubm64efMmWrVqBQC4evWqRi1++CHvfs/dvXs3pk+fLlOa3DM1NUW1atWwYsUKNGjQQOpCoO2ioqIAvF49XFsFBgYiMDAQsbGxWdYnXL58uSyZtPsvjgYLCwtDzZo1s7S7ubkhLCxMhkSkbbR9RN8bCQkJ0qypbm5uH+xsKdfsqrm1cOFC/PTTT4iKisLWrVulswrnz59Ht27dZE73efH29pY7widLT0+Hv78/5s+fL50VNDExga+vLyZPnvzBaUA0jb+/P6ZOnYratWujZMmSGnNmisVOAdHX18ejR4+ydC6Ljo7W+m+1VPi8vb3Rr18/renA+zZLS0vpcomFhUW2f/yEli14aGFhgQULFmRp9/f3lyFN3qhUKq2dg0opE1W+4evri3/++QezZs1Sm1piypQpePr0qdQZXhssXrwYAQEB6Nmzp9xR1PBTt4B4eXlhwoQJ+Pfff6V+CHFxcfjhhx/w1VdfyZyOtE18fDyaNWsGBwcH9OnTB97e3ihVqpTcsXLk0KFDKFasmPSztnygfsjevXthYmIiTQWwcOFC/PXXX3B2dsbChQu1ol+bEOKDc1C9oYkjy96eqPL333/X+vfUunXrsGHDBrRs2VJqc3V1hb29Pbp166ZVxU5qairq168vd4ws2EG5gDx48AAeHh54+vQp3NzcAAAXL16EjY0NDhw4oNXXY0kejx8/xurVq7Fy5UqEhYWhWbNm6NevH9q1a6dVp7mVwMXFBTNnzkSrVq1w+fJlfPHFF/Dz88Phw4fh5OSkFVMeKGUOKiWwtrZGUFBQlnXKrl27Bg8PD62ZrR4Axo0bBxMTE0ycOFHuKGpY7BSgly9fYu3atQgNDYWhoSFcXV3RrVs3fjDRJ7tw4QJWrFiBZcuWwcTEBN999x2GDh2KihUryh3tg1asWAETExN07txZrX3z5s1ISkrSmv4XJiYmuHLlCsqWLYspU6bgypUr2LJlCy5cuIBWrVohJiZG7oifjd27d0NXVxfNmzdXa9+/fz8yMjLUzpZoqqlTp+L69etYsWKFdKYtJSUF/fr1Q8WKFTW+/56fn5/0c2ZmJlauXAlXV1e4urpm+byTa64dXsYqQMbGxhg4cKDcMUhhoqOjceDAARw4cAC6urrS2QVnZ2fMmjXrvSvVa4Lp06djyZIlWdqtra0xcOBArSl29PT0pI7WBw8eRK9evQC87kuSkJAgZ7TPzvjx47NdYDIzMxPjx4/XimInJCQEgYGBKF26tDRUPjQ0FKmpqWjatCk6duwo7auJlxXfXc6iRo0aAIArV67IkCZ7LHYK0OrVq7FkyRLcvn0bwcHBcHBwwO+//45y5cqhXbt2cscjLZKWlob//vsPK1aswP79++Hq6orvv/8e3bt3l0Ywbdu2DX379tXoYicyMhKOjo5Z2h0cHBAZGSlDorxp2LAh/Pz80KBBA5w5cwYbN24EANy8eROlS5eWOd3nJTw8HM7OzlnanZycEBERIUOi3LOwsECnTp3U2rSpq8Phw4fljvBRLHYKyKJFizBp0iR8//33+Pnnn6VRJpaWlpg7dy6LHcqVkiVLIjMzE926dcOZM2ekb05va9y4MSwsLAo9W25YW1vj0qVLWeaiCQ0Nfe+kcJpowYIFGDp0KLZs2YJFixZJncX37NmDFi1ayJzu82Jubo7bt29neU9FRERk6WytqZTUL6pv376YN2+e1IH8jZcvX8LX11e2eXa4XEQBqVKliti2bZsQQggTExNx69YtIYQQly9fFsWLF5cxGWmjVatWiVevXskd45ONHTtWODg4iEOHDon09HSRnp4uAgMDhYODgxg1apTc8UgLDRw4ULi4uIiIiAipLTw8XLi6uop+/frJmCz3YmNjxbFjx8SxY8dEbGys3HHyREdHRzx69ChL++PHj4Wurq4MiV7jmZ0CcufOHWkU1tv09fU1eoVq0kyaNmdFXk2bNg13795F06ZNpfmmMjMz0atXL/z6668yp/uwtydH/Fi/HG2ZHFEJZs2ahRYtWsDJyUm6hHj//n18+eWX+O2332ROlzNvznqsWrVKmnFYV1cXvXr1wh9//AEjIyOZE35cQkIChBAQQuDFixcwMDCQtmVkZGD37t2yrkzPYqeAODo64uLFi3BwcFBr37t3b5bhhUQf8/LlS8yYMeO9U7Dfvn1bpmS5o6enh40bN2LatGnSKEUXF5cs/080kRInR1QCc3NznDx5EgcOHFAb+apNE3D6+fkhKCgIO3bsQIMGDQAAx48fx/DhwzFq1CitmGfnzf8JlUqFSpUqZdmuUqlknXSTxU4B8fPzg4+PD5KTkyGEwJkzZ7B+/XpMnz4dy5YtkzseaZn+/fsjKCgIPXv21Kgp2POqbNmyEEKgfPnyWjOj+NuTI2pDh8zPiUqlgpeXFzw8PKCvr691/z+2bt2KLVu2wNPTU2pr1aoVDA0N8e2332pFsXP48GEIIdCkSRNs3bpV+r8CvP6S4+DgADs7O/kCynYB7TOwZs0aUaFCBaFSqYRKpRKlSpUSy5YtkzsWaSFzc3Nx/PhxuWN8spcvX4q+ffsKXV1doaurK/VlGzZsmJg+fbrM6UgbZWRkiKlTpwo7Ozu199RPP/2kNX9vDQ0NRVhYWJb2K1euCCMjIxkS5d3du3fF0aNHRY8ePUS9evXE/fv3hRCv+x0eO3ZMtlw68pVZytejRw+Eh4cjMTERMTExuH//Pvr16yd3LNJClpaWat+UtNWECRMQGhqKI0eOqF3Tb9asmTR8W1skJyfjzJkz2LlzJ/777z+1GxWen3/+GQEBAZg1axb09PSk9mrVqmnNWXR3d3dMnjwZycnJUturV6/g7+8vrZWlLc6dO4fmzZvD0NAQISEhSElJAfB6yRtZ++XJVmZ9Jh49eiSOHj0qjh49qrW960l+q1evFt988414+fKl3FE+SZkyZURwcLAQQn2UYnh4uDA1NZUzWq7s2bNHWFlZSWdt377p6OjIHe+zUr58eXHw4EEhhPp76tq1a8LCwkLOaDl26dIlYWdnJ4oXLy6aNGkimjRpIooXLy5KlSolrly5Ine8XKlRo4ZYuXKlEEL93+PChQvCxsZGtlzacbFcC7148QJDhw7F+vXr1XrXd+nSBQsXLpQWByV6Hzc3N7W+BxEREbCxsUHZsmWzTMF+4cKFwo6XJ48fP852RMbLly+1qp+Fr68vOnfujEmTJsHGxkbuOJ+1Bw8eoEKFClnaMzMzkZaWJkOi3HNxcUF4eDjWrl2L69evAwC6deuGHj16wNDQUOZ0uXPjxo1sO4ebm5sjLi6u8AP9HxY7BaR///4ICQnBrl27pNOQwcHBGDFiBAYNGoQNGzbInJA0Xfv27eWOkO9q166NXbt2wdfXFwCkAmfZsmVadbr+0aNH8PPzY6GjAZydnXHs2LEsI/q2bNmS7fQfmiYtLQ1OTk7YuXMnBgwYIHecT2Zra4uIiIgskzweP34c5cqVkycUWOwUmJ07d2Lfvn1o2LCh1Na8eXP89ddfnGGVckTTF//Li19//RUtW7ZEWFgY0tPTMW/ePISFheHkyZMICgqSO16OffPNNzhy5AjKly8vd5TP3qRJk+Dt7Y0HDx4gMzMT//zzD27cuIFVq1Zh586dcsf7qKJFi6r11dF2AwYMwIgRI7B8+XKoVCo8fPgQwcHBGD16tKwroXPV8wJSpkwZ7Nq1Cy4uLmrtly5dQqtWrXD//n2ZkpG2EP83Z4vS3Lp1CzNmzEBoaCgSExNRs2ZNjBs3Lsv/FU2WlJSEzp07w8rKCi4uLlkuKw4fPlymZJ+nY8eOYerUqWrvqUmTJsHLy0vuaDny66+/4ubNm1i2bJnWTMXwPkII/Prrr5g+fbq0WK6+vj5Gjx6NadOmyZaLxU4BWbp0KTZv3ozVq1fD1tYWABATEwNvb2907NgRgwYNkjkhaTpnZ2dMmjQJHTt2VBtl8q7w8HDMmTMHDg4OGD9+fCEm/Hz9/fffGDx4MAwMDFC8eHG1olSlUmnNJI+kGTp06IDAwECYmJjAxcUly5pemrjS+cekpqYiIiICiYmJcHZ2homJiax5WOwUEDc3N0RERCAlJQVlypQB8HrFZ319fVSsWFFtX23pXEqFKzAwEOPGjcPt27fx1VdfoXbt2rCzs4OBgQGeP3+OsLAwHD9+HFevXsWwYcPwww8/aEXH94yMDGzbtg3Xrl0D8Lqoa9eunVZ9o7W1tcXw4cMxfvx46OhwBg9NcO7cObX3VK1atWROlHN9+vT54HYlLRQqFxY7BSQ302IrsW8G5Z/jx49j48aNOHbsGO7du4dXr16hRIkScHNzQ/PmzdGjRw9YWlrKHTNHrl69irZt2yImJgaVK1cGANy8eRNWVlbYsWMHqlWrJnPCnClWrBjOnj3LPjsa4P79++jWrRtOnDgBCwsLAEBcXBzq16+PDRs2SOtl0eeNxQ4RFRp3d3dYWVlh5cqVUoH2/Plz9O7dG48fP8bJkydlTpgzI0eOhJWVFX744Qe5o3z2WrRogbi4OKxcuVIqoG/cuIE+ffrAzMwMe/fulTlhzsXGxuLGjRsAgMqVK8u6cKbSsNgpIFFRUVCpVNK3ijNnzmDdunVwdnbGwIEDZU5HJA9DQ0OcO3cOVatWVWu/cuUKvvjiC7x69UqmZLkzfPhwrFq1CtWrV4erq2uWDspz5syRKdnnx9DQECdPnswyzPz8+fP48ssvpU6ymiwhIQE+Pj7YsGGDtIgs52XLX7zYXEC6d+8uLRYYExODZs2a4cyZM/jxxx8xdepUmdMRyaNSpUp49OhRlvbY2NhsJ4bTVJcvX4abmxt0dHRw5coVhISESLeLFy/KHe+zYm9vn+3kgRkZGfIuPJkLAwYMwOnTp7Fz507ExcUhLi4OO3fuxLlz5ziYJb/IMW3z58DCwkJcv35dCCHEvHnzRP369YUQQuzbt084OjrKGY1INrt27RJVq1YVmzdvFlFRUSIqKkps3rxZuLi4iF27don4+HjpRpQT27dvF3Xq1BFnz56V2s6ePSvq1asntm3bJl+wXDAyMsp2kcyjR49q3UKgmoqXsQqIiYkJrly5grJly6Jt27Zo0KABxo0bh8jISFSuXFlrTtcT5ae3Ry69Ga795k/Q2/dVKpV0Ol+TRURE4NatW/Dw8IChoaFi50bSZJaWlkhKSkJ6ero0ou/Nz+8O4X727JkcET+K87IVPO0Z66llqlatisWLF6N169Y4cOCANJnSw4cPUbx4cZnTEcnjzaVdbff06VN8++23OHz4MFQqFcLDw1GuXDn069cPlpaWmD17ttwRPxtz586VO8In++mnn+Dn55dlXrYxY8bIOuuwkvDMTgE5cuQIOnTogISEBHh7e2P58uUAgB9++AHXr1/XykmiSF6ZmZmIiIhAbGystLjsG9ktvEcFp1evXoiNjcWyZctQpUoVhIaGoly5cti3bx/8/Pxw9epVuSOSFuG8bAWPZ3YKiKenJ548eYKEhAS1OVAGDhwIIyMjGZORNjp16hS6d++Oe/fu4d3vJ9pyyQcA9u7dCxMTE2nNuIULF+Kvv/6Cs7MzFi5cqDXzBe3fvx/79u3LModLxYoVce/ePZlSfZ4uXLiAokWLSpeA/v33X6xYsQLOzs6YMmXKB2cf1xRKXPRX0/DMDpEWqFGjBipVqgR/f3+ULFkyS78QbRma6uLigpkzZ6JVq1a4fPkyateujVGjRuHw4cNwcnLSmpliTU1NceHCBVSsWBGmpqbSmZ1z586hefPmePr0qdwRPxtffPEFxo8fj06dOuH27dtwdnZGx44dcfbsWbRu3VoRl7no07HYIdICxsbGCA0N1arh2dl5u+P+lClTcOXKFWzZsgUXLlxAq1atEBMTI3fEHGnVqhVq1aqFadOmwdTUFJcuXYKDgwO6du2KzMxMbNmyRe6Inw1zc3NcuHAB5cuXx8yZM3Ho0CHs27cPJ06cQNeuXREVFSV3xGyxM3vh4jw7RFqgbt26iIiIkDvGJ9PT05MmeTt48KC0KnWxYsWQkJAgZ7RcmTVrFpYuXYqWLVsiNTUVY8eORbVq1XD06FHMnDlT7nifFSGE1Ift4MGDaNWqFYDX8+88efJEzmgfVLVqVWzYsAGpqakf3C88PBxDhgzBjBkzCimZMrHPDpEW8PX1xahRoxATEwMXF5csM/a6urrKlCx3GjZsCD8/PzRo0ABnzpzBxo0bAbxeH0ub1jCqVq0abt68iQULFsDU1BSJiYno2LEjfHx8ULJkSbnjfVZq166Nn3/+Gc2aNUNQUBAWLVoEALhz5w5sbGxkTvd+f/zxB8aNG4ehQ4fmaKHfIUOGyB1Zq/EyVgFZtWoVunTpAn19fbX21NRUbNiwAb169ZIpGWmj7FbWVqlUWjUnDfB6hMnQoUMRFRWF4cOHo1+/fgBerzWVkZGB+fPny5yQtM2lS5fQo0cPREZGws/PT1pY2dfXF0+fPsW6detkTvhhSlroV5Ox2Ckgurq6iI6OzrKQ29OnT2Ftba01H06kGT42wsfBwaGQkhBph+TkZOjq6mY5C0qfJ17GKiDv63x2//59rRk5Q5qDxQxR7hgYGMgdgTQIi5185ubmBpVKBZVKhaZNm0rTlwOvF6a7c+cOWrRoIWNC0la3bt3C3Llzce3aNQCAs7MzRowYgfLly8ucjIhIs7HYyWdvJoe6ePEimjdvDhMTE2mbnp4eypYti06dOsmUjrTVvn370LZtW9SoUQMNGjQAAJw4cQJVq1bFjh078NVXX8mckIhIc7HPTgFZuXIlunTpwlOplC/edFZ8d/jp+PHjsX//fk4hX8iWL1+Oxo0bw9HRUe4oRJQDnGengHh7e8PAwADnz5/HmjVrsGbNGoSEhMgdi7TUtWvXpJFLb+vbty/CwsJkSJQ3ffv2xYsXL7K0v3z5En379pUhUd5Mnz4dFSpUQJkyZdCzZ08sW7ZMEfMgaaOpU6dKcze97dWrV5g6daoMiUgT8cxOAYmNjUXXrl1x5MgRWFhYAADi4uLQuHFjbNiwAVZWVvIGJK1ib2+POXPmoHPnzmrtmzZtwujRoxEZGSlTstx53yjFJ0+ewNbWFunp6TIly70HDx7gyJEjOHr0KIKCghAeHo6SJUvC09MTa9askTveZ0MpI1+50G/BYp+dAuLr64sXL17g6tWrqFKlCgAgLCwM3t7eGD58ONavXy9zQtImAwYMwMCBA3H79m3Ur18fwOs+OzNnzoSfn5/M6T4uISEBQggIIfDixQu1y7sZGRnYvXt3lg8rTVeqVCn06NEDHTp0wLFjx7B+/XqsXbsWGzZsYLFTiN438jU0NBTFihWTIVHuKWWhX03GMzsFxNzcHAcPHsQXX3yh1n7mzBl4eXkhLi5OnmCklYQQmDt3LmbPno2HDx8CAOzs7DBmzBgMHz5c49fY0dHR+WBGlUoFf39//Pjjj4WYKu/279+PI0eO4MiRIwgJCUGVKlXQqFEjeHp6wsPDg5PAFQJLS0uoVCrEx8fDzMxM7f2VkZGBxMREDB48GAsXLpQxZc4oZaFfTcZip4CYmpri2LFjqFGjhlp7SEgIGjVqpFXrAJFmedPnxdTUVOYkORcUFAQhBJo0aYKtW7eqfePW09ODg4MD7OzsZEyYOzo6OrCyssKoUaMwcOBA6VI1FZ6VK1dCCIG+ffti7ty5agXBm5Gv7u7uMibMOaUs9KvJWOwUkHbt2iEuLg7r16+X/og/ePBAmvp727ZtMickKnz37t2Dvb19tstfaJO5c+fi6NGjOHr0KPT19aWzOp6enqhUqZLc8T4rQUFBqF+/vlbPlNykSROMHTuWc7AVIBY7BSQqKgpt27bF1atXYW9vL7VVq1YN//33n1YtekjyqFmzJgIDA2FpaSlNVvk+2jT0PC4uDn///bc0OWLVqlXRt29frT1Vf/nyZQQFBeHQoUPYuXMnrK2tcf/+fbljfVYyMjKwfft2tfdU27ZtoaurK3OynNm2bRt++uknjBkzRqsX+tVkLHYKkBACBw8exPXr1wEAVapUQbNmzWRORdrC398fY8aMgZGREfz9/T+475vFDzXduXPn0Lx5cxgaGqJOnToAgLNnz+LVq1fYv38/atasKXPCnBNCICQkBEeOHMHhw4dx/PhxvHjxAi4uLpxmohBFRESgVatWePDgASpXrgwAuHHjBuzt7bFr1y6tmGFcKQv9ajIWO0RUaL788ktUqFABf/31l7SUSnp6Ovr374/bt2/j6NGjMifMmTZt2uDEiRNISEhA9erV4enpiUaNGsHDw4P9dwpZq1atIITA2rVrpb5gT58+xXfffQcdHR3s2rVL5oQfx4V+Cx6LnQIUGBiIwMDAbOdNWL58uUypSBtFRUVBpVJJlz/PnDmDdevWwdnZGQMHDpQ5Xc4ZGhoiJCQETk5Oau1hYWGoXbt2tpPDaaIxY8agUaNG+PLLL7X28ptSGBsb49SpU3BxcVFrDw0NRYMGDZCYmChTMtIknGengPj7+2Pq1KmoXbt2tkMJiXKje/fuGDhwIHr27ImYmBg0a9YM1apVw9q1axETE4NJkybJHTFHzMzMEBkZmaXYiYqK0qrRZf/73//kjkD/R19fP9tZuRMTE6GnpydDorzhQr8FTFCBsLW1FatWrZI7BimEhYWFuH79uhBCiHnz5on69esLIYTYt2+fcHR0lDNarvj6+orSpUuLDRs2iMjISBEZGSnWr18vSpcuLUaMGCF3vFw5cuSI+Prrr0X58uVF+fLlRZs2bcTRo0fljvXZ6dmzp6hatao4deqUyMzMFJmZmSI4OFhUq1ZNeHt7yx0vR/bu3Sv09PREnTp1xMiRI8XIkSNFnTp1hL6+vti/f7/c8RSBxU4BKVasmIiIiJA7BimEsbGxuHPnjhBCiDZt2ogZM2YIIYS4d++eMDAwkDFZ7qSkpIjhw4cLPT09oaOjI3R0dIS+vr74/vvvRXJystzxcmz16tWiSJEi4ttvvxXz5s0T8+bNE99++60oWrSoWLt2rdzxPivPnz8Xbdu2FSqVSujp6Unvrfbt24u4uDi54+VIjRo1xLhx47K0jxs3Tri5ucmQSHnYZ6eAjBs3DiYmJpg4caLcUUgB6tati8aNG6N169bw8vLCqVOnUL16dZw6dQrffPON1g11TkpKwq1btwAA5cuXh5GRkcyJcqdKlSoYOHAgRo4cqdY+Z84c/PXXX9KlCCo84eHhaiNftWmCPgMDA1y+fBkVK1ZUa7958yZcXV2RnJwsUzLlYJ+dApKcnIylS5fi4MGDcHV1zTJvwpw5c2RKRtpo5syZ6NChA/73v//B29sb1atXBwD8999/0hBubWJkZJSlQ6k2uX37Ntq0aZOlvW3btvjhhx9kSEQVK1bMUixoCysrK1y8eDFL/osXL2rdmnGaisVOAbl06ZK0VMSVK1fUtrGzMuWWp6cnnjx5goSEBLV1lwYOHKhVZ0VevnyJGTNmvHeU4u3bt2VKljv29vYIDAzMcvbg4MGD0iSiVDgyMjIQEBDw3vfUoUOHZEqWc9q+0K82YLFTQA4fPix3BFKQV69eQQghFTr37t3Dtm3bUKVKFTRv3lzmdDnXv39/BAUFoWfPnlo9SnHUqFEYPnw4Ll68qPbhFBAQgHnz5smc7vMyYsQIBAQEoHXr1qhWrZpWvqcmTpwIU1NTzJ49GxMmTADweqHfKVOmYPjw4TKnUwb22SHSAl5eXujYsSMGDx6MuLg4ODk5oWjRonjy5AnmzJmDIUOGyB0xRywsLLBr1y40aNBA7iifbNu2bZg9e7bUP6dKlSoYM2YM2rVrJ3Oyz0uJEiWwatUqtGrVSu4o+UIbF/rVBtq9Gh/RZ+LChQv48ssvAQBbtmyBjY0N7t27h1WrVmH+/Pkyp8s5S0tLtRXPtVmHDh1w/PhxPH36FE+fPsXx48dZ6MhAT09Pqzojf4ypqSkLnQLAMztEWsDIyAjXr19HmTJl8O2336Jq1aqYPHkyoqKiULlyZa2ZeXjNmjX4999/sXLlSq3qa0Saa/bs2bh9+zYWLFigVZewlLrQr6Zinx0iLVChQgVs374dHTp0wL59+6Qhz7GxsTAzM5M5Xc7Nnj0bt27dgo2NDcqWLZtllKIm/1G3tLTM8Yfps2fPCjgNvXH8+HEcPnwYe/bsQdWqVbO8p/755x+Zkn1Yu3btoK+vDwBo3769vGE+Ayx2iLTApEmT0L17d4wcORJNmzaFu7s7AGD//v1wc3OTOV3OafMf9blz58odgbJhYWGBDh06yB0j1yZPnpztz1QweBmLSEvExMQgOjoa1atXh47O6+52Z86cgZmZWZa1pohIeyhloV9NxmKHiAqUEEKr+lIQFbYvv/xSbaHfSpUqoVq1aggPD4evr6/WLPSryVjsEGmojh07IiAgAGZmZujYseMH99XUfgnA69WbJ02ahI4dO35wFerw8HDMmTMHDg4OGD9+fCEmJG3TokULTJkyBfXq1fvgfi9evMCff/4JExMT+Pj4FFK63LO0tMSpU6dQuXJlzJ8/Hxs3bsSJEyewf/9+DB48WGsm29Rk7LNDpKHMzc2lMyLm5uYyp8m7P/74A+PGjcPQoUPx1VdfoXbt2rCzs4OBgQGeP3+OsLAwHD9+HFevXsWwYcO0Zs4gkk/nzp3RqVMnmJubo02bNu99T+3evRutW7fG//73P7kjf1BaWprUWfngwYNo27YtAMDJyQnR0dFyRlMMntkhokJx/PhxbNy4EceOHcO9e/fw6tUrlChRAm5ubmjevDl69OihthQG0YekpKRg8+bN2LhxI44fP474+HgAr5fjcXZ2RvPmzdGvXz9UqVJF5qQfp7SFfjURix0iItJ68fHxePXqFYoXL55l+LmmO3LkCDp06ICEhAR4e3tj+fLlAIAffvgB169f1+jL1NqCxQ6RFnj69CkmTZqEw4cPZ7vYIed1KXgf6zf1Nn44UW5lZGRkWej37t27MDIy4srn+YB9doi0QM+ePREREYF+/frBxsaGo5tk8Ha/KSEEtm3bBnNzc9SuXRsAcP78ecTFxeWqKCIClLPQrybjmR0iLWBqaorjx4+jevXqckchAOPGjcOzZ8+wePFi6OrqAnj9zXzo0KEwMzPT+A6xpFmUstCvJuNCoERawMnJCa9evZI7Bv2f5cuXY/To0VKhAwC6urrw8/OT+lsQ5ZRSFvrVZCx2iLTAn3/+iR9//BFBQUF4+vQpEhIS1G5UuNLT03H9+vUs7devX8/Sn4roY5KSkqSVzvfv34+OHTtCR0cH9erVw71792ROpwzss0OkBSwsLJCQkIAmTZqotb+ZnTgjI0OmZLlz4cIFFC1aFC4uLgCAf//9FytWrICzszOmTJnywUkHNUmfPn3Qr18/3Lp1C3Xq1AEAnD59GjNmzECfPn1kTvd5OXv2LDIzM1G3bl219tOnT0NXV1fqU6XJlLLQryZjsUOkBXr06IGiRYti3bp1Wt1BedCgQRg/fjxcXFxw+/ZtdO3aFR06dMDmzZuRlJSkNYtt/vbbb7C1tcXs2bOlSd9KliyJMWPGYNSoUTKn+7z4+Phg7NixWYqdBw8eYObMmTh9+rRMyXJOKQv9ajJ2UCbSAkZGRggJCUHlypXljvJJzM3NceHCBZQvXx4zZ87EoUOHsG/fPpw4cQJdu3ZFVFSU3BFz7c1lRH4Dl4eJiQkuXbqEcuXKqbXfuXMHrq6uePHihUzJcocL/RYsntkh0gK1a9dGVFSU1hc7QgipT8vBgwfx9ddfAwDs7e3x5MkTOaPlGYsceenr6+PRo0dZip3o6GgUKaI9H3G2trawtbVVa3tziZQ+Hc/sEGmBzZs3Y8qUKRgzZgxcXFyyzBDr6uoqU7LcadKkCezt7dGsWTP069cPYWFhqFChAoKCguDt7Y27d+/KHTFHHj16hNGjRyMwMBCxsbF498+otvShUoJu3bohOjoa//77rzQXUlxcHNq3bw9ra2ts2rRJ5oTZU8pCv9pCe8peos9Yly5dAAB9+/aV2lQqldZ1UJ47dy569OiB7du348cff0SFChUAvB5uW79+fZnT5Vzv3r0RGRmJiRMnomTJklrbh0oJfvvtN3h4eMDBwUHq33Lx4kXY2Nhg9erVMqd7P6Us9KsteGaHSAt8bPipg4NDISUpGMnJydDV1dWaNY1MTU1x7Ngx1KhRQ+4oBODly5dYu3YtQkNDYWhoCFdXV3Tr1k1r3k9U8Hhmh0gLaHsx8zEGBgZyR8gVe3v7LJeuSD7GxsYYOHCg3DFIg/HMDpGWWL16NRYvXow7d+4gODgYDg4OmDt3LhwdHdGuXTu54+WIjo7OBy/5aMvluP3792P27NlYsmQJypYtK3ecz85///2Hli1bomjRovjvv/8+uG/btm0LKVXecaHfgsczO0RaYNGiRZg0aRK+//57/PLLL1JRYGFhgblz52pNsbNt2za1+2lpaQgJCcHKlSvh7+8vU6rc69KlC5KSklC+fHkYGRlluVzCD6eC1b59e8TExMDa2hrt27d/737a0p+NC/0WPJ7ZIdICzs7O+PXXX9G+fXuYmpoiNDQU5cqVw5UrV+Dp6am1w7bfWLduHTZu3Ih///1X7ig5snLlyg9u9/b2LqQkpARc6Lfg8cwOkRa4c+dOtjOp6uvr4+XLlzIkyl/16tXTqj4XLGY0Q1paGlq0aIHFixejYsWKcsfJMy70W/C4ECiRFnB0dMTFixeztO/duxdVqlQp/ED56NWrV5g/fz5KlSold5Q8SU5O5sKsMilatCguXbokd4xPxoV+Cx7P7BBpAT8/P/j4+CA5ORlCCJw5cwbr16/H9OnTsWzZMrnj5ZilpaVafwQhBF68eAEjIyOsWbNGxmS58/LlS4wbNw6bNm3C06dPs2zXhn4iSvHdd9/h77//xowZM+SOkmdKWehXk7HYIdIC/fv3h6GhIX766SckJSWhe/fusLOzw7x589C1a1e54+XYuwt96ujowMrKCnXr1oWlpaU8ofJg7NixOHz4MBYtWoSePXti4cKFePDgAZYsWaLVH7raKD09HcuXL8fBgwdRq1YtGBsbq22fM2eOTMlyTikL/WoydlAm0jJJSUlITEyEtbW13FE+W2XKlMGqVavg6ekJMzMzXLhwARUqVMDq1auxfv167N69W+6In43GjRt/cPvhw4cLKUneKWWhX03GMztEWsbIyAhGRkZyx8iz58+f4++//8a1a9cAvB5p1qdPHxQrVkzmZDn37NkzaeFJMzMzaah5w4YNMWTIEDmjfXa0oZj5GKUs9KvJ2EGZiArN0aNHUbZsWcyfPx/Pnz/H8+fPMX/+fDg6OuLo0aNyx8uxcuXK4c6dOwBej6R5s9jkjh07YGFhIWOyz0/fvn3x4sWLLO0vX75UW0tOk/n6+mLEiBEICAjA+fPncenSJbUbfTpexiKiQuPi4gJ3d3csWrQIurq6AF535h06dChOnjyJy5cvy5wwZ37//Xfo6upi+PDhOHjwINq0aQMhBNLS0jBnzhyMGDFC7oifDV1dXURHR2e5rPvkyRPY2toiPT1dpmQ5p6OT9byDNi70q8lY7BBRoTE0NMTFixeznK6/ceMGatSoobVzjdy7dw/nz59HhQoV4OrqKnecz0JCQgKEELC0tER4eDisrKykbRkZGdixYwfGjx+Phw8fypgyZ5S+0K8mYJ8dIi2QnJysdYtlZqdmzZq4du1almLn2rVrWj17rIODAz+QCpmFhQVUKhVUKhUqVaqUZbtKpdKaJUj43il4LHaItICFhQXq1KmDRo0awdPTE/Xr14ehoaHcsXLk7T4Hw4cPx4gRIxAREYF69eoBAE6dOoWFCxdyyDblyuHDhyGEQJMmTbB161a1Du56enpwcHCAnZ2djAlzRwkL/WoyXsYi0gLHjx/H0aNHceTIEZw8eRLp6emoXbu2VPx89dVXckd8rzcrnX/sTw37JlBe3Lt3D2XKlNHquWneXej3ypUrKFeuHAICArBy5UpFjDiTG4sdIi2Tnp6Os2fPYsmSJVi7di0yMzM1ukj4WH+Et/F0PuXWihUrYGJigs6dO6u1b968GUlJSVqxjpnSF/rVBLyMRaQlbt68iSNHjki3lJQUfP311/D09JQ72gexgKGCNH36dCxZsiRLu7W1NQYOHKgVxY7SF/rVBCx2iLRAqVKl8OrVK3h6esLT0xPjxo2Dq6urVp+613YZGRnYvn27NDli1apV0bZtW2lIPRWOyMhIODo6Zml3cHBAZGSkDIly781Cv+9+MVDCQr+agsUOkRawsrLC9evXERMTg5iYGDx69AivXr3S6pmUtVlERARat26N+/fvSyPLpk+fDnt7e+zatQvly5eXOeHnw9raGpcuXULZsmXV2kNDQ1G8eHF5QuWSUhb61WTss0OkJeLi4nD06FEEBQUhKCgIYWFhqFGjBho3boxffvlF7niflVatWkEIgbVr10qjgJ4+fYrvvvsOOjo62LVrl8wJPx/jxo3Dxo0bsWLFCnh4eAAAgoKC0LdvX3zzzTf47bffZE6YM2vXrsWUKVNw69YtAICdnR38/f3Rr18/mZMpA4sdIi3z9OlTHDlyBP/++y/Wr1+v8R2UlcjY2BinTp2Ci4uLWntoaCgaNGiAxMREmZJ9flJTU9GzZ09s3rwZRYq8vliRmZmJXr16YfHixdDT05M5Ye5wod+CwctYRFrgn3/+kTomh4WFoVixYmjYsCFmz56NRo0ayR0v11JTUxEbG4vMzEy19jJlysiUKHf09fWzXY8pMTFR6z5ctZ2enh42btyIadOmITQ0FIaGhnBxcdHajvHavtCvpuKZHSItYG1tDQ8PD3h6eqJRo0ZZzihoi/DwcPTt2xcnT55Ua9e2NYB69eqFCxcu4O+//0adOnUAAKdPn8aAAQNQq1YtBAQEyBvwM5Samoo7d+6gfPny0hkeojdY7BBRoWnQoAGKFCmC8ePHo2TJkllGk2nLkhFxcXHw9vbGjh07ULRoUQCv5z9q27YtAgICYG5uLnPCz0dSUhJ8fX2xcuVKAK+naChXrhx8fX1RqlQpjB8/XuaEpAlY7BBpiXeHOjs7O6Ndu3ZaNdTZ2NgY58+fh5OTk9xR8kwIgaioKFhZWeHBgwfSv0eVKlVQoUIFmdN9fkaMGIETJ05g7ty5aNGiBS5duoRy5crh33//xZQpUxASEiJ3RNIAPNdHpAUiIiLQqlUrPHjwQKuHOjs7O2v9bLBCCFSoUAFXr15FxYoVWeDIbPv27di4cSPq1aundqawatWq0sgmTaeUhX41mY7cAYjo44YPH47y5csjKioKFy5cwIULF6TJ1IYPHy53vBybOXMmxo4diyNHjuDp06dISEhQu2kDHR0dVKxYEU+fPpU7CgF4/PhxtiOXXr58qTWTblpYWMDDwwMTJ05EYGAgXr16JXckxeFlLCItoJShzjo6r79fvfshpG0dlHfs2IFZs2Zh0aJFqFatmtxxPmseHh7o3LkzfH19YWpqikuXLsHR0RG+vr4IDw/H3r175Y74Udq80K+2YLFDpAWKFSuGnTt3on79+mrtJ06cQJs2bfDs2TOZkuVOUFDQB7dryzB6S0tLJCUlIT09HXp6ejA0NFTbri3/Hkpw/PhxtGzZEt999x0CAgIwaNAghIWF4eTJkwgKCkKtWrXkjpgr2rbQr7Zgnx0iLfD1119j4MCBWYY6Dx48GG3btpU5Xc5pSzHzMXPnzpU7Av2fhg0b4uLFi5gxYwZcXFywf/9+1KxZE8HBwVo1RYO2LvSrLXhmh0gLKGmoc1xcHP7++2+1BTT79u2rVcdAlJ/eXei3UaNGXOg3n7HYIdIi4eHhuH79OgDtHOp87tw5NG/eHIaGhtIZqrNnz+LVq1fSN3JtcevWLaxYsQK3bt3CvHnzYG1tjT179qBMmTKoWrWq3PEULTed2c3MzAowSf6oUaMGrl+/jpo1a0oFT8OGDTmTcj5isUNEhebLL79EhQoV8Ndff0mz3Kanp6N///64ffs2jh49KnPCnAkKCkLLli3RoEEDHD16FNeuXUO5cuUwY8YMnDt3Dlu2bJE7oqLp6Oh89KyHtnV650K/BYvFDpGG8vPzy/G+c+bMKcAk+cfQ0BAhISFZJhUMCwtD7dq1kZSUJFOy3HF3d0fnzp3h5+cHU1NThIaGoly5cjhz5gw6duyI+/fvyx1R0T7W0f1t2tZPjAv9Fgx2UCbSUDmd+VWbruubmZkhMjIyS7ETFRUFU1NTmVLl3uXLl7Fu3bos7dbW1lo/aaI20LYC5mOUttCvJmKxQ6ShDh8+LHeEfNelSxf069cPv/32mzSM/sSJExgzZgy6desmc7qcs7CwQHR0NBwdHdXaQ0JCUKpUKZlSfb6OHTuGJUuW4Pbt29i8eTNKlSqF1atXw9HREQ0bNpQ73kcNHjwYHh4eGDhwoFYv9KvJWOwQUaH57bffoFKp0KtXL6SnpwMAihYtiiFDhmDGjBkyp8u5rl27Yty4cdi8eTNUKhUyMzNx4sQJjB49Gr169ZI73mdl69at6NmzJ3r06IELFy4gJSUFABAfH49ff/0Vu3fvljnhx8XGxsodQfHYZ4eICl1SUpK0blH58uW1btRJamoqfHx8EBAQgIyMDBQpUgQZGRno3r07AgICtGpxVm3n5uaGkSNHolevXmr9p0JCQtCyZUvExMTIHTFHlLDQryZjsUNElEdRUVG4fPkyEhMT4ebmhooVK8od6bNjZGSEsLAwlC1bVq3YuX37NpydnZGcnCx3xI/KbqHfGzduaN1Cv5qMl7GIqEB17NgRAQEBMDMzQ8eOHT+47z///FNIqT7N0aNH4eTkBHt7e9jb20vtaWlpCA4OhoeHh4zpPi+2traIiIhA2bJl1dqPHz+OcuXKyRMql94s9Hvq1CkUK1YMwOtRWd999x2GDx+OXbt2yZxQ+7HYIaICZW5uLo0YU8osyZ6enrCxscG2bdtQr149qf3Zs2do3LgxhwoXogEDBmDEiBFYvnw5VCoVHj58iODgYIwePRoTJ06UO16OBAUFqRU6AFC8eHHMmDEDDRo0kDGZcrDYIaICtWLFimx/1nZdu3ZF06ZNsXDhQvTu3VtqZ8+AwjV+/HhkZmaiadOmSEpKgoeHB/T19TF69Gj4+vrKHS9H9PX18eLFiyztiYmJ0NPTkyGR8rDPDhEVmlevXkEIIXVIvnfvHrZt2wZnZ2d4eXnJnC7ndHV1ER0djePHj6NXr14YOHAgZs+ejdjYWNjZ2fHMjgxSU1MRERGBxMREODs7w8TERO5IOdarVy9cuHAhy0K/AwYMQK1atRAQECBvQAVgsUNEhcbLywsdO3bE4MGDERcXh8qVK0NPTw9PnjzBnDlzMGTIELkj5oiOjg5iYmJgbW2NkJAQtGvXDs7Ozpg3bx6cnZ1Z7FCuKGmhX03FYoeICk2JEiUQFBSEqlWrYtmyZfjjjz8QEhKCrVu3YtKkSdKwW033drEDADExMWjfvj3u37+P6OhoFjuUJ9q+0K8mY58dIio0SUlJ0rIQ+/fvR8eOHaGjo4N69erh3r17MqfLOW9vbxgaGkr3bW1tERQUhIEDB2rNYqakeSpWrMjpCwoIz+wQUaFxdXVF//790aFDB1SrVg179+6Fu7s7zp8/j9atW2vNBHBEn0qJC/1qMp7ZIaJCM2nSJHTv3h0jR45E06ZN4e7uDuD1WR43NzeZ031YZGQkypQpk+P9Hzx4wHWy6L2UuNCvJuOZHSIqVDExMYiOjkb16tWho6MDADhz5gzMzMyyrIauSWxsbNC+fXv0798fX3zxRbb7xMfHY9OmTZg3bx4GDhyI4cOHF3JKIsoOix0ikk1CQgIOHTqEypUro0qVKnLH+aCnT5/il19+wfLly2FgYIBatWrBzs4OBgYGeP78OcLCwnD16lXUrFkTEydORKtWreSOTET/h8UOERWab7/9Fh4eHhg2bBhevXqF6tWr4+7duxBCYMOGDejUqZPcET/q1atX2LVrF44fP4579+7h1atXKFGiBNzc3NC8eXNUq1ZN7ohE9A4WO0RUaGxtbbFv3z5Ur14d69atw+TJkxEaGoqVK1di6dKlOe7HQESUGzpyByCiz0d8fLy0/s/evXvRqVMnGBkZoXXr1ggPD5c5HREpFYsdIio09vb2CA4OxsuXL7F3715piYjnz5/DwMBA5nREpFQcek5Eheb7779Hjx49YGJiAgcHB3h6egIAjh49ChcXF3nDEZFisc8OERWqc+fOISoqCl999ZW0WOOuXbtgYWGBBg0ayJyOiJSIxQ4RUT4SQnAiOCINw8tYRFSg/Pz8MG3aNBgbG390inxtmRa/d+/eWLhwIYyNjdXa7969i549e+LYsWMyJSOi7LDYIaICFRISgrS0NOnn99GmsyGhoaFwdXXFmjVrpCUvVq5cieHDh6NJkyYypyOid/EyFhFRLqWlpeGHH37A/PnzMWrUKERERGDPnj2YM2cOBgwYIHc8InoHix0iojyaPHkypk2bhiJFiiAoKEg6y0NEmoXFDhEVmuTkZPzxxx84fPgwYmNjkZn5/9q795iq6/iP46+DIXIRL3lLl+doUh4IyiSHYqKWpVtrZDpnTXRS6hyCt6a14UKnNStvqauWoZkXMqfV1GKRxxQ1ZyrGNJDEuRVoFl0UFZHP749+np9nUJE/+H7ly/OxsXE+38t5jQ325vN5n8+3JuD4kSNHbEr231y7dk1z587VqlWrNGvWLO3bt0/FxcVas2YNz8QCbkP07ACwTGpqqnJzczVq1Cj169evSfXp3Cw+Pl6VlZXy+XxKSEiQMUaLFy/WyJEjNXHiRK1evdruiABuwswOAMu0adNGO3fubPL76aSmpmrFihW1Po119OhRjRs3ToWFhTYlA1AXih0AlomOjtbmzZsVFxdnd5RGc/XqVYWEhNgdA8BNKHYAWGbXrl1asWKF3n77bbndbrvj/Cd//PGHIiMj/d//kxvnAbg90LMDwDLx8fG6cuWKevbsqbCwMAUHBwcc//XXX21K9u/atWunsrIyderUSW3btq2z3+jG7snXr1+3ISGAv0OxA8AyY8eO1Y8//qhFixapc+fOTapB+auvvlL79u0lSbt377Y5DYD/gmUsAJYJCwvTgQMH9MADD9gdBUAzwswOAMv07t1bly9ftjtGg7hy5YqOHz9e535BTz31lE2pANSFmR0AlsnNzVVWVpYWLlyo2NjYWj07TaWx9/PPP1dKSoouXLhQ6xg9O8Dth2IHgGWCgoIk1X7oZ1Nr7I2KitLjjz+uefPmqXPnznbHAfAvWMYCYBmnNPaeO3dOM2fOpNABmgiKHQCWSUpKsjtCgxg1apR8Pp/uueceu6MAqAeWsQBYau/evXrnnXd0+vRpbdmyRd26ddP69evVo0cPDRw40O549VJZWanRo0erY8eOdfYepaen25QMQF2Y2QFgma1bt2rcuHF67rnndOTIEV29elWS9Pvvv2vRokXauXOnzQnrZ9OmTcrNzVWrVq3k8/kCepBcLhfFDnCbYWYHgGX69OmjGTNmKCUlRa1bt1ZBQYF69uypo0ePasSIESovL7c7Yr106dJF6enpmjt3rr/pGsDti99SAJYpKirSoEGDao23adNGv/32m/WBblFVVZXGjBlDoQM0EfymArBMly5dVFJSUmt837596tmzpw2Jbs348eOVk5NjdwwA9UTPDgDLvPDCC8rIyND7778vl8uln376SQcOHNDs2bOVmZlpd7x6u379uhYvXqwvvvhCcXFxtRqUlyxZYlMyAHWh2AFgmblz56qmpkaPPvqoKisrNWjQIIWEhGj27NmaNm2a3fHq7bvvvlOfPn0kSYWFhQHHmtLDTYHmggZlAJarqqpSSUmJLl68qOjoaEVERNgdCYCD0bMDwHItW7ZUQUGBYmJiKHQANDpmdgDYIjIyUseOHWtSjckAmiZmdgDYgv+zAFiFYgcAADgaxQ4AW+zatUtdu3a1OwaAZoCeHQC2uPGnh49qA2hszOwAsNQHH3yg2NhYhYaGKjQ0VHFxcVq/fr3dsQA4GJsKArDMkiVLlJmZqbS0NCUmJkr661ERU6ZM0YULFzRjxgybEwJwIpaxAFimR48eysrKUkpKSsD4unXr9Morr6i0tNSmZACcjGUsAJYpKyvTgAEDao0PGDBAZWVlNiQC0BxQ7ACwTK9evfTRRx/VGs/JyVFUVJQNiQA0B/TsALBMVlaWxowZo6+//trfs5Ofn6+8vLw6iyAAaAj07ACw1JEjR7RkyRKdPHlSkuT1ejVr1iz/U8QBoKFR7ACwxLVr1zR58mRlZmaqR48edscB0IzQswPAEsHBwdq6davdMQA0QxQ7ACyTnJys7du32x0DQDNDgzIAy0RFRWn+/PnKz89X3759FR4eHnA8PT3dpmQAnIyeHQCW+adeHZfLpdOnT1uYBkBzQbEDAAAcjZ4dAJarqqpSUVGRqqur7Y4CoBmg2AFgmcrKSqWmpiosLEwxMTE6e/asJGnatGl67bXXbE4HwKkodgBY5qWXXlJBQYF8Pp9atWrlH3/ssceUk5NjYzIATsansQBYZvv27crJyVFCQoJcLpd/PCYmRj/88IONyQA4GTM7ACzz888/q1OnTrXGL126FFD8AEBDotgBYJn4+Hjt2LHD//pGgfPee++pf//+dsUC4HAsYwGwzKJFizRixAidOHFC1dXVWr58uU6cOKH9+/drz549dscD4FDM7ACwzMCBA3Xs2DFVV1crNjZWubm56tSpkw4cOKC+ffvaHQ+AQ7GpIAAAcDRmdgBYZujQocrKyqo1XlFRoaFDh9qQCEBzwMwOAMsEBQXpzjvvVGJiojZs2OB/EOi5c+fUtWtXXb9+3eaEAJyImR0Alvryyy9VXl6uhIQEnTlzxu44AJoBih0Alrrrrru0Z88excbG6uGHH5bP57M7EgCHo9gBYJkb++qEhIRo48aNysjI0PDhw7V69WqbkwFwMnp2AFgmKChI5eXlAbsob926VePHj9fly5fp2QHQKNhUEIBlSktL1bFjx4CxZ555Rr1799bhw4dtSgXA6ZjZAQAAjkbPDgAAcDSKHQAA4GgUOwAAwNEodgAAgKNR7ABodlwul7Zv3253DAAWodgBcNuoqqqyOwIAB6LYAdBoBg8erLS0NKWlpalNmzbq0KGDMjMzdWPHC4/HowULFiglJUWRkZGaNGmSpL82GoyJiVFISIg8Ho/efPPNgPveuG7s2LEKDw9Xt27dtGrVqnpl8ng8kqSnn35aLpdLHo9HZ86cUVBQUK29fpYtWya3262amhr5fD65XC7t2LFDcXFxatWqlRISElRYWBhwzb59+/TII48oNDRUd999t9LT03Xp0qVb+fEBaCgGABpJUlKSiYiIMBkZGeb77783H374oQkLCzPvvvuuMcYYt9ttIiMjzRtvvGFKSkpMSUmJOXz4sAkKCjLz5883RUVFJjs724SGhprs7Gz/fd1ut2ndurV59dVXTVFRkVmxYoVp0aKFyc3N/ddM58+fN5JMdna2KSsrM+fPnzfGGDNs2DAzderUgHPj4uLMvHnzjDHG7N6920gyXq/X5ObmmuPHj5snn3zSeDweU1VVZYwxpqSkxISHh5ulS5ea4uJik5+fb/r06WMmTJjQED9OALeIYgdAo0lKSjJer9fU1NT4x+bMmWO8Xq8x5q+iJTk5OeCaZ5991gwbNixg7MUXXzTR0dH+12632wwfPjzgnDFjxpgRI0bUK5cks23btoCxnJwc065dO3PlyhVjjDHffvutcblcprS01Bjzf8XO5s2b/df88ssvJjQ01OTk5BhjjElNTTWTJk0KuO/evXtNUFCQuXz5cr2yAWh4LGMBaFQJCQn+B4BKUv/+/XXq1Cn/c7Di4+MDzj958qQSExMDxhITEwOuuXGfm/Xv318nT5685ZzJyclq0aKFtm3bJklau3athgwZ4l/2qut927dvr/vuu8//vgUFBVq7dq0iIiL8X0888YRqampUWlp6y9kA/P/wbCwAtgoPD7c7giSpZcuWSklJUXZ2tkaOHKmNGzdq+fLl/+keFy9e1OTJk5Wenl7rWPfu3RsqKoD/iGIHQKP65ptvAl4fPHhQUVFRatGiRZ3ne71e5efnB4zl5+fr3nvvDbjm4MGDte7r9XrrlSk4OLjOJ6w///zzuv/++7V69WpVV1dr5MiRtc45ePCgv3CpqKhQcXGx/30feughnThxQr169apXDgDWYBkLQKM6e/asZs6cqaKiIm3atElvvfWWMjIy/vb8WbNmKS8vTwsWLFBxcbHWrVunlStXavbs2QHn5efna/HixSouLtaqVau0ZcuWf7zvzTwej/Ly8lReXq6Kigr/uNfrVUJCgubMmaOxY8cqNDS01rXz589XXl6eCgsLNWHCBHXo0EHJycmSpDlz5mj//v1KS0vTsWPHdOrUKX3yySdKS0urVy4AjcTupiEAzpWUlGSmTp1qpkyZYiIjI027du3Myy+/7G9YdrvdZunSpbWu+/jjj010dLQJDg423bt3N6+//nrAcbfbbbKysszo0aNNWFiY6dKli1m+fHm9c3366aemV69e5o477jButzvg2Jo1a4wkc+jQoYDxGw3Kn332mYmJiTEtW7Y0/fr1MwUFBQHnHTp0yAwbNsxERESY8PBwExcXZxYuXFjvbAAansuY/93wAgAa2ODBg/Xggw9q2bJlDXpfj8ej6dOna/r06Q16X0lasGCBtmzZouPHjweM+3w+DRkyRBUVFWrbtm2Dvy+AxsMyFgDor+biwsJCrVy5UtOmTbM7DoAGRLEDwFE2bNgQ8NHvm79iYmL+9rq0tDT17dtXgwcP1sSJEy1MDKCxsYwFwFH+/PNPnTt3rs5jwcHBcrvdFicCYDeKHQAA4GgsYwEAAEej2AEAAI5GsQMAAByNYgcAADgaxQ4AAHA0ih0AAOBoFDsAAMDRKHYAAICj/Q/YEeiLMA57zwAAAABJRU5ErkJggg==",
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
bus_count
\n",
+ "
total_cost
\n",
+ "
cost_per_bus
\n",
+ "
\n",
+ "
\n",
+ "
prop_type
\n",
+ "
bus_size_type
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "
\n",
+ "
BEB
\n",
+ "
articulated
\n",
+ "
12.0
\n",
+ "
18759576
\n",
+ "
1563298
\n",
+ "
\n",
+ "
\n",
+ "
standard/conventional (30ft-45ft)
\n",
+ "
151.0
\n",
+ "
148472913
\n",
+ "
983264
\n",
+ "
\n",
+ "
\n",
+ "
FCEB
\n",
+ "
not specified
\n",
+ "
29.0
\n",
+ "
38070971
\n",
+ "
1312792
\n",
+ "
\n",
+ "
\n",
+ "
standard/conventional (30ft-45ft)
\n",
+ "
73.0
\n",
+ "
82880364
\n",
+ "
1135347
\n",
+ "
\n",
+ "
\n",
+ "
electric (not specified)
\n",
+ "
articulated
\n",
+ "
29.0
\n",
+ "
39478000
\n",
+ "
1361310
\n",
+ "
\n",
+ "
\n",
+ "
not specified
\n",
+ "
15.0
\n",
+ "
17200000
\n",
+ "
1146666
\n",
+ "
\n",
+ "
\n",
+ "
zero-emission bus (not specified)
\n",
+ "
not specified
\n",
+ "
143.0
\n",
+ "
128156513
\n",
+ "
896199
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
"text/plain": [
- "
"
+ " bus_count \\\n",
+ "prop_type bus_size_type \n",
+ "BEB articulated 12.0 \n",
+ " standard/conventional (30ft-45ft) 151.0 \n",
+ "FCEB not specified 29.0 \n",
+ " standard/conventional (30ft-45ft) 73.0 \n",
+ "electric (not specified) articulated 29.0 \n",
+ " not specified 15.0 \n",
+ "zero-emission bus (not specified) not specified 143.0 \n",
+ "\n",
+ " total_cost \\\n",
+ "prop_type bus_size_type \n",
+ "BEB articulated 18759576 \n",
+ " standard/conventional (30ft-45ft) 148472913 \n",
+ "FCEB not specified 38070971 \n",
+ " standard/conventional (30ft-45ft) 82880364 \n",
+ "electric (not specified) articulated 39478000 \n",
+ " not specified 17200000 \n",
+ "zero-emission bus (not specified) not specified 128156513 \n",
+ "\n",
+ " cost_per_bus \n",
+ "prop_type bus_size_type \n",
+ "BEB articulated 1563298 \n",
+ " standard/conventional (30ft-45ft) 983264 \n",
+ "FCEB not specified 1312792 \n",
+ " standard/conventional (30ft-45ft) 1135347 \n",
+ "electric (not specified) articulated 1361310 \n",
+ " not specified 1146666 \n",
+ "zero-emission bus (not specified) not specified 896199 "
]
},
"metadata": {},
@@ -337,39 +2094,35 @@
}
],
"source": [
- "# bus count BY PROP TYPE\n",
- "display(Markdown(bus_count_prop_type_desc))\n",
- "make_chart(\n",
- " \"total_bus_count\", \n",
- " \"Bus count by propulsion type\",\n",
- " x_col=\"prop_type\",\n",
- " data=prop_agg\n",
+ "display(\n",
+ " Markdown(\"## What is the breakdown of ZEB Propulsion Type and Bus Size Category?\"),\n",
+ " pivot_size.loc[zeb_list]\n",
")"
]
},
{
"cell_type": "code",
- "execution_count": 8,
- "id": "4f092539-c4c6-4579-aa02-fbee65414ec3",
+ "execution_count": 32,
+ "id": "63f90c48-e28d-4d88-8b90-891a3e3e3681",
"metadata": {
"execution": {
- "iopub.execute_input": "2024-03-29T21:43:45.720708Z",
- "iopub.status.busy": "2024-03-29T21:43:45.719411Z",
- "iopub.status.idle": "2024-03-29T21:43:45.727923Z",
- "shell.execute_reply": "2024-03-29T21:43:45.726938Z"
- },
- "tags": []
+ "iopub.execute_input": "2024-06-26T23:31:44.346496Z",
+ "iopub.status.busy": "2024-06-26T23:31:44.345382Z",
+ "iopub.status.idle": "2024-06-26T23:31:44.355400Z",
+ "shell.execute_reply": "2024-06-26T23:31:44.354301Z"
+ }
},
"outputs": [
{
"data": {
"text/markdown": [
"\n",
- "## Conclusion\n",
- "Based on the findings so far in bus only projects, there is evidence that bus procurement cost vary widely amongst transit agencies all over the country. Non-ZEB bus cost variation was wide. Whereas ZEB cost variation was much tighter. However ZEBs do have a higher cost per bus than non-ZEB.\n",
+ "## **Conclusion**\n",
"\n",
- "Most of the bus only projects were for non-ZEBs. This can be explained by looking into the initial project list. Lots of projects that procured ZEBs also included the installation of chargers and related charging infrastructure. Indicating that transit agencies are still adopting and preparing for ZEBs and need to make the initial investment in the equipment. \n",
- "\n"
+ "Based on these findings, The average cost of a ZEB, throughout the US, is ~$1,000,000, roughly twice the price of a conventional, non-ZEB.\n",
+ "The variance in cost depends mainly on the options the Trasnit\n",
+ "Agencies chooses. Highly optioned/customized buses contribute to high cost.\n",
+ "Unfortunately, analyzing the cost of configuable options is outside the scope of data provided. \n"
],
"text/plain": [
""
@@ -380,13 +2133,23 @@
}
],
"source": [
- "display(Markdown(conclusion))"
+ "conclusion = f\"\"\"\n",
+ "## **Conclusion**\n",
+ "\n",
+ "Based on these findings, The average cost of a ZEB, throughout the US, is ~$1,000,000, roughly twice the price of a conventional, non-ZEB.\n",
+ "The variance in cost depends mainly on the options the Trasnit\n",
+ "Agencies chooses. Highly optioned/customized buses contribute to high cost.\n",
+ "Unfortunately, analyzing the cost of configuable options is outside the scope of data provided. \n",
+ "\"\"\"\n",
+ "display(\n",
+ " Markdown(conclusion)\n",
+ ")"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "c6ce4e1b-c1a2-40d1-84c1-0a20a4400eb3",
+ "id": "8f8c2bae-652c-4532-8b46-2f4fa7003d65",
"metadata": {},
"outputs": [],
"source": []
@@ -409,7 +2172,10 @@
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
- }
+ },
+ "toc-autonumbering": false,
+ "toc-showcode": true,
+ "toc-showmarkdowntxt": true
},
"nbformat": 4,
"nbformat_minor": 5
diff --git a/bus_procurement_cost/cost_per_bus_cleaner.py b/bus_procurement_cost/cost_per_bus_cleaner.py
index d4f33f8cd..637249926 100644
--- a/bus_procurement_cost/cost_per_bus_cleaner.py
+++ b/bus_procurement_cost/cost_per_bus_cleaner.py
@@ -1,16 +1,17 @@
import pandas as pd
-from fta_data_cleaner import gcs_path
+from bus_cost_utils import *
+from scipy.stats import zscore
-def prepare_data() ->pd.DataFrame:
+
+
+def prepare_all_data() ->pd.DataFrame:
"""
primary function to read-in, merge data across FTA, TIRCP and DGS data.
standardizes columns names, then exports as parquet.
"""
# variables for file names
- # all bus only projects for each dataset
- fta_bus_data = "fta_bus_cost_clean.parquet"
- tircp_bus_data = "clean_tircp_project_bus_only.parquet"
- dgs_bus_data = "dgs_agg_w_options_clean.parquet"
+
+
# dictionary to update columns names
col_dict = {
@@ -28,9 +29,10 @@ def prepare_data() ->pd.DataFrame:
}
# reading in data
- fta = pd.read_parquet(f"{gcs_path}{fta_bus_data}")
- tircp = pd.read_parquet(f"{gcs_path}{tircp_bus_data}")
- dgs = pd.read_parquet(f"{gcs_path}{dgs_bus_data}")
+ # bus only projects for each datase
+ fta = pd.read_parquet(f"{GCS_PATH}clean_fta_bus_only.parquet")
+ tircp = pd.read_parquet(f"{GCS_PATH}clean_tircp_bus_only.parquet")
+ dgs = pd.read_parquet(f"{GCS_PATH}clean_dgs_bus_only_w_options.parquet")
# adding new column to identify source
fta["source"] = "fta"
@@ -71,13 +73,29 @@ def prepare_data() ->pd.DataFrame:
],
how="outer",
)
+ #normalizing data with cost per bus
+ #calculating cost per bus here
+ merge2["cost_per_bus"] = (merge2["total_cost"] / merge2["bus_count"]).astype("int64")
+ #calculating zscore on cost per bus
+ merge2["zscore_cost_per_bus"] = zscore(merge2["cost_per_bus"])
+
+ #flag any outliers
+ merge2["is_cpb_outlier?"] = merge2["zscore_cost_per_bus"].apply(outlier_flag)
return merge2
+
+
+
if __name__ == "__main__":
# initial df
- df1 = prepare_data()
+ df1 = prepare_all_data()
+ #remove outliers based on cost per bus zscore
+ df2 = df1[df1["is_cpb_outlier?"]==False]
# export to gcs
- df1.to_parquet(f'{gcs_path}cpb_analysis_data_merge.parquet')
\ No newline at end of file
+ #full data, with outliers
+ df1.to_parquet(f'{GCS_PATH}cleaned_cpb_analysis_data_merge.parquet')
+ # no outliers
+ df2.to_parquet(f'{GCS_PATH}cleaned_no_outliers_cpb_analysis_data_merge.parquet')
\ No newline at end of file
diff --git a/bus_procurement_cost/cost_per_bus_utils.py b/bus_procurement_cost/cost_per_bus_utils.py
deleted file mode 100644
index 65dcb6410..000000000
--- a/bus_procurement_cost/cost_per_bus_utils.py
+++ /dev/null
@@ -1,454 +0,0 @@
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-import seaborn as sns
-import shared_utils
-from matplotlib.ticker import ScalarFormatter
-from scipy.stats import zscore
-
-def overall_cpb(df: pd.DataFrame) -> pd.DataFrame:
- """
- function to calculate cpb on overall dataframe.
- """
- # copy of df
- df1 = df.copy()
-
- # add new column for cost per bus (cpb)
- df1['cpb'] = (df1['total_cost'] / df1['bus_count']).astype("int64")
-
- return df1
-
-def get_zscore(df: pd.DataFrame) -> pd.DataFrame:
- """
- seperate function to calculate zscore.
- """
- # add new column for z-score
- df1 = df.copy()
-
- df1["zscore_cost_per_bus"] = zscore(df1["cpb"])
-
- return df1
-
-def remove_outliers(df: pd.DataFrame, zscore_col: int) -> pd.DataFrame:
- """
- function to remove zscore outliers from data.
- keeps rows with ascore -3>x<3
-
- """
- df1 = df[
- (df[zscore_col] >= -3) & (df[zscore_col] <= 3)
- ]
- return df1
-
-def cpb_zscore_outliers(df: pd.DataFrame) -> pd.DataFrame:
- """
- function that calculated cost per bus col, z-score col, then removes outliers(remove rows with zscore >3)
- """
- df = overall_cpb(df)
- df = get_zscore(df)
- df1 = remove_outliers(df, "zscore_cost_per_bus")
-
- return df1
-
-
-def cpb_aggregate(df: pd.DataFrame, column: str) -> pd.DataFrame:
- """
- function to aggregate compiled data by different categories (transit agency, propulsion type, size type).
- aggregate on columns:
- "project_title"
- "ppno"
- "total_cost"
- "bus_count"
-
- Then, cost per bus is calculated AFTER the aggregation.
- """
- df_agg = (
- df.groupby(column)
- .agg(
- total_project_count=("project_title", "count"),
- total_project_count_ppno=("ppno", "count"),
- total_agg_cost=("total_cost", "sum"),
- total_bus_count=("bus_count", "sum"),
- )
- .reset_index()
- )
- df_agg["cpb"] = (df_agg["total_agg_cost"] / df_agg["total_bus_count"]).astype("int64")
- return df_agg
-
-def zeb_only_df(df: pd.DataFrame) -> pd.DataFrame:
- """
- filters df to only show rows that are zero-emission buses (ZEB).
- """
- zeb_list =[
- 'BEB',
- #'CNG',
- 'FCEB',
- 'electric (not specified)',
- #'ethanol',
- #'low emission (hybrid)',
- #'low emission (propane)',
- #'mix (diesel and gas)',
- #'mix (zero and low emission)',
- #'not specified',
- 'zero-emission bus (not specified)'
- ]
- df1 = df.copy()
-
- df1 = df1[df1["prop_type"].isin(zeb_list)]
-
- return df1
-
-def non_zeb_only_df(df: pd.DataFrame) -> pd.DataFrame:
- non_zeb_list =[
- 'CNG',
- 'ethanol',
- 'low emission (hybrid)',
- 'low emission (propane)',
- 'mix (diesel and gas)',
- 'mix (zero and low emission)',
- ]
-
- df1 = df.copy()
-
- df1 = df1[df1["prop_type"].isin(non_zeb_list)]
-
- return df1
-
-def dist_curve(
- df: pd.DataFrame,
- mean: str,
- std: str,
- title: str,
- xlabel: str,
-):
- """
- function to make distribution curve. uses the "cpb" column of the df.
- """
- sns.histplot(df["cpb"], kde=True, color="skyblue", bins=20)
- # mean line
- plt.axvline(
- mean, color="red", linestyle="dashed", linewidth=2, label=f"Mean: ${mean:,.2f}"
- )
- # mean+1std
- plt.axvline(
- mean + std,
- color="green",
- linestyle="dashed",
- linewidth=2,
- label=f"Standard Deviation: ${std:,.2f}",
- )
- plt.axvline(mean - std, color="green", linestyle="dashed", linewidth=2)
- plt.axvline(mean + (std * 2), color="green", linestyle="dashed", linewidth=2)
- plt.axvline(mean + (std * 3), color="green", linestyle="dashed", linewidth=2)
-
- plt.title(title + " with Mean and Standard Deviation")
- plt.xlabel(xlabel)
- plt.ylabel("Frequency")
-
- # Turn off scientific notation on x-axis?
- plt.gca().xaxis.set_major_formatter(ScalarFormatter(useMathText=False))
-
- plt.legend()
- plt.show()
-
- return
-
-def make_chart(y_col: str, title: str, data: pd.DataFrame, x_col: str):
- """
- function to create chart. sorts values by y_col ascending."""
-
- data.sort_values(by=y_col, ascending=False).head(10).plot(
- x=x_col, y=y_col, kind="bar", color="skyblue"
- )
- plt.title(title)
- plt.xlabel(x_col)
- plt.ylabel(y_col)
-
- plt.ticklabel_format(style="plain", axis="y")
- plt.show()
-
-### VARIABLES
-
-#INITIAL DF AGG VARIABLES
-
-# initial, overall df
-all_bus = pd.read_parquet(
- "gs://calitp-analytics-data/data-analyses/bus_procurement_cost/cpb_analysis_data_merge.parquet"
-)
-
-# count of all projects from each source
-def all_project_counter(fta_file: str, tircp_file:str, dgs_file: str) -> int:
- """
- function to count all the projects from fta, tircp and dgs files.
- use to find the total number of projects and the total number of bus only projects
- """
- gcs_path = "gs://calitp-analytics-data/data-analyses/bus_procurement_cost/"
-
-
- all_fta = len(pd.read_parquet(f"{gcs_path}{fta_file}"))
- all_tircp = len(pd.read_parquet(f"{gcs_path}{tircp_file}"))
- all_dgs = len(pd.read_parquet(f"{gcs_path}{dgs_file}"))
-
- count_all_projects = all_fta+all_tircp+all_dgs
-
- return count_all_projects
-
-all_project_count = all_project_counter(
- fta_file = "fta_all_projects_clean.parquet",
- tircp_file = "clean_tircp_project.parquet",
- dgs_file = "dgs_agg_clean.parquet"
-)
-
-bus_only_project_count = all_project_counter(
- fta_file = "fta_bus_cost_clean.parquet",
- tircp_file = "clean_tircp_project_bus_only.parquet",
- dgs_file = "dgs_agg_clean.parquet"
-)
-
-#count of all bus only projects per dataset
-bus_only_count_fta = len(pd.read_parquet(
- "gs://calitp-analytics-data/data-analyses/bus_procurement_cost/fta_bus_cost_clean.parquet"))
-bus_only_count_tircp = len(pd.read_parquet(
- "gs://calitp-analytics-data/data-analyses/bus_procurement_cost/clean_tircp_project_bus_only.parquet"))
-bus_only_count_dgs = len(pd.read_parquet(
- "gs://calitp-analytics-data/data-analyses/bus_procurement_cost/dgs_agg_clean.parquet"))
-
-#count of all projects per dataset
-count_all_fta = len(pd.read_parquet("gs://calitp-analytics-data/data-analyses/bus_procurement_cost/fta_all_projects_clean.parquet"))
-count_all_tircp = len(pd.read_parquet("gs://calitp-analytics-data/data-analyses/bus_procurement_cost/clean_tircp_project.parquet"))
-count_all_dgs = len(pd.read_parquet("gs://calitp-analytics-data/data-analyses/bus_procurement_cost/dgs_agg_clean.parquet"))
-
-# Variables
-all_bus_only_projects = len(all_bus)
-total_bus_count = sum(all_bus.bus_count)
-total_funding = sum(all_bus.total_cost)
-
-
-
-## ALL BUS
-
-# initial df with cpb col
-#all_bus_cpb = overall_cpb(all_bus)
-
-# get zscore
-#cpb_zscore = get_zscore(all_bus_cpb)
-
-# initial df with cpb/zscore, remove outliers
-no_outliers = cpb_zscore_outliers(all_bus)
-
-
-# aggregate by transit agency
-agency_agg = cpb_aggregate(no_outliers, "transit_agency")
-
-# aggregate by prop type
-prop_agg = cpb_aggregate(no_outliers, "prop_type")
-
-# aggregate by bus size
-size_agg = cpb_aggregate(no_outliers, "bus_size_type")
-
-min_bus_cost = no_outliers.cpb.min()
-max_bus_cost = no_outliers.cpb.max()
-max_bus_count = no_outliers.bus_count.max()
-
-
-#how many zeb and non-zeb bus
-zeb_list =[
- "BEB",
- "FCEB",
- "electric (not specified)",
- "zero-emission bus (not specified)",
-]
-
-non_zeb_list =[
- "CNG",
- "ethanol",
- "low emission (hybrid)",
- "low emission (propane)",
- "mix (zero and low emission)",
-]
-just_zeb_count = prop_agg[prop_agg["prop_type"].isin(zeb_list)]["total_bus_count"].sum()
-just_non_zeb_count = prop_agg[prop_agg["prop_type"].isin(non_zeb_list)]["total_bus_count"].sum()
-
-
-# VARIABLES
-cpb_mean = no_outliers.cpb.mean()
-cpb_std = no_outliers.cpb.std()
-
-# agency with highest bus count
-agency_with_most_bus = no_outliers.loc[
- no_outliers["bus_count"].idxmax(), "transit_agency"
-]
-
-# propulsion type max count and name
-prop_type_name_max_freq = no_outliers["prop_type"].value_counts().idxmax()
-prop_type_max = no_outliers["prop_type"].value_counts().max()
-
-# prop type min count and anme
-prop_type_name_min_freq = no_outliers["prop_type"].value_counts().idxmin()
-prop_type_min = no_outliers["prop_type"].value_counts().min()
-
-# how many buses do they have? already answered
-agency_with_highest_funds = no_outliers.loc[
- all_bus["total_cost"].idxmax(), "transit_agency"
-]
-
-# what is the highest amount? already answered
-agency_max_cpb = no_outliers.loc[no_outliers["cpb"].idxmax(), "transit_agency"]
-agency_min_cpb = no_outliers.loc[no_outliers["cpb"].idxmin(), "transit_agency"]
-prop_type_max_cpb = no_outliers.loc[no_outliers["cpb"].idxmax(), "prop_type"]
-prop_type_min_cpb = no_outliers.loc[no_outliers["cpb"].idxmin(), "prop_type"]
-
-## ZEB ONLY VARIABLES
-
-# zeb only df
-zeb_only = zeb_only_df(all_bus)
-
-# calc cpb
-#zeb_cpb = overall_cpb(zeb_only)
-
-# get cpb, zscore, remove outliers
-zeb_no_outliers = cpb_zscore_outliers(zeb_only)
-
-# remove outliers
-#zeb_no_outliers = remove_outliers(zeb_zscore, "zscore_cost_per_bus")
-
-# aggregate by transit agency
-zeb_agency_agg = cpb_aggregate(zeb_no_outliers, "transit_agency")
-
-# aggregate by prop type
-zeb_prop_agg = cpb_aggregate(zeb_no_outliers, "prop_type")
-
-# aggregate by bus size
-zeb_size_agg = cpb_aggregate(zeb_no_outliers, "bus_size_type")
-
-# VARIABLES
-zeb_count = len(zeb_no_outliers.prop_type)
-
-# zeb only, no outliers cpb curve
-zeb_only_mean = zeb_no_outliers.cpb.mean()
-zeb_only_std = zeb_no_outliers.cpb.std()
-
-## NON-ZEB VARIABLES
-
-# no zeb df
-non_zeb_only = non_zeb_only_df(all_bus)
-
-# calc cpb
-#non_zeb_cpb = overall_cpb(non_zeb_only)
-
-# get zscore
-#non_zeb_zscore = get_zscore(non_zeb_cpb)
-
-# get cpb, zscore, remove outliers
-non_zeb_no_outliers = cpb_zscore_outliers(non_zeb_only)
-
-# aggregate by transit agency
-non_zeb_agency_agg = cpb_aggregate(non_zeb_no_outliers, "transit_agency")
-
-# aggregate by prop type
-non_zeb_prop_agg = cpb_aggregate(non_zeb_no_outliers, "prop_type")
-
-# aggregate by bus size
-non_zeb_size_agg = cpb_aggregate(non_zeb_no_outliers, "bus_size_type")
-
-# VARIABLES
-non_zeb_count = len(non_zeb_no_outliers.prop_type)
-
-# non-zeb cpb mean and std dev
-non_zeb_only_mean = non_zeb_no_outliers.cpb.mean()
-non_zeb_only_std = non_zeb_no_outliers.cpb.std()
-
-# start summary narative
-summary = f"""
-## Summary
-This analysis examines the cost of buses for transit agencies across the county. Specifically, to observe the variation of bus cost for propulsion type and bus sizes.
-
-Data was compiled from three data sources:
-
-1. {count_all_fta} projects from FTA Bus and Low- and No-Emission Grant Awards press release (federally funded, nationwide data)
-2. {count_all_tircp} projects TIRCP project data (state-funded, California only)
-3. {count_all_dgs} projects DGS usage report for all procurements from California agencies purchasing from New Flyer and Portera Inc..
-
-The compiled dataset includes **{all_project_count}** total transit related projects. However, the initial dataset included projects that encompassed bus procurement and other components such as charging installation and facility construction, as well as non-bus related projects (ferries, trains). The dataset was filtered to exclude projects that were not bus related, indicated 0 buses procured, and projects that contained construction/installation work. **{bus_only_project_count}** projects remained that specified the number of buses to procure and explicitly described procuring buses (bus only projects).
-
-Number of bus only contracts from each dataset
-- FTA: **{bus_only_count_fta}**
-- TIRCP: **{bus_only_count_tircp}**
-- DGS: **{bus_only_count_dgs}**
-
-
-The remaining bus only projects were categorized into different propulsion types and bus sizes, a “cost per bus” value was calculated, and outliers removed.
-
-A overall summary is provided below:
-- Total projects: **298**
-- Number of projects with mix bus procurement and other components, also non-bus projects: **204**
-- Number of bus only projects: **{bus_only_project_count}**
-- Total dollars awarded to bus only projects: **`${total_funding:,.2f}`**
-- Total number of buses: **{total_bus_count}**
-- Most common propulsion type procured for bus only projects: **{prop_type_name_max_freq}** at **{prop_type_max}** projects
-- Number of ZEB buses* procured: **{just_zeb_count}**
-- Number of non-ZEB buses** procured: **{just_non_zeb_count}**
-- Overall average cost per bus (ZEB & non-ZEB) is `${cpb_mean:,.2f}` (std `${cpb_std:,.2f}`)
-- ZEB average cost per bus is `${zeb_only_mean:,.2f}` (std `${zeb_only_std:,.2f}`)
-- Non-ZEB average cost per bus is `${non_zeb_only_mean:,.2f}` (std `${non_zeb_only_std:,.2f}`)
-
-`*`ZEB buses include: zero-emission (not specified), electric (not specified), battery electric, fuel cell electric
-
-`**`Non-ZEB buses include: CNG, ethanol, low emission (hybrid, propane), diesel, gas.
-
-
-Below are key charts that visualize more findings:
-
-
-"""
-
-all_bus_desc = """
-## All buses (ZEB and non-ZEB) cost/bus distribution curve.
-This chart shows the cost per bus distribution of all bus only projects.
-"""
-
-# ZEB only, cpb distribution
-zeb_desc = """
-## ZEB only cost/bus Distribution Chart.
-Chart of projects with zero-emission, electric, battery electric, hydrogen fuel cell bus procurements.
-"""
-
-# non-ZEB
-non_zeb_desc = """
-## non-ZEB cost/bus Distribution.
-Chart of projects with non-ZEB bus procurement (hybrids, diesel, cng)
-This distrubtion is wider than the ZEB projects."""
-
-#highest cpb agency
-highest_cpb_desc = """
-## Highest Cost per Bus by Transit Agency
-SFMTA is the agency with the highest cost per bus of all agencies in the analysis
-"""
-
-# Highest awarded agency
-highest_award = """
-## Most funds Awarded by Transit Agency
-LA Metro was awarded almost double the next agency. Followed by SFMTA"""
-
-# most buses
-most_bus = """
-## Highest Bus Count by Agency.
-LA Metro plans to procure the most buses."""
-
-#prop_type cpb
-cpb_prop_type_desc = """
-## Cost per bus by propulsion type.
-"""
-
-#prop_type bus coutn
-bus_count_prop_type_desc = """
-## Bus count by propulsion type.
-"""
-
-conclusion = """
-## Conclusion
-Based on the findings so far in bus only projects, there is evidence that bus procurement cost vary widely amongst transit agencies all over the country. Non-ZEB bus cost variation was wide. Whereas ZEB cost variation was much tighter. However ZEBs do have a higher cost per bus than non-ZEB.
-
-Most of the bus only projects were for non-ZEBs. This can be explained by looking into the initial project list. Lots of projects that procured ZEBs also included the installation of chargers and related charging infrastructure. Indicating that transit agencies are still adopting and preparing for ZEBs and need to make the initial investment in the equipment.
-
-"""
\ No newline at end of file
diff --git a/bus_procurement_cost/dgs_data_cleaner.py b/bus_procurement_cost/dgs_data_cleaner.py
index 8cd27c401..f55a2e768 100644
--- a/bus_procurement_cost/dgs_data_cleaner.py
+++ b/bus_procurement_cost/dgs_data_cleaner.py
@@ -2,7 +2,7 @@
import pandas as pd
import shared_utils
from calitp_data_analysis.sql import to_snakecase
-
+from bus_cost_utils import *
def calculate_total_cost(row):
"""
@@ -14,246 +14,18 @@ def calculate_total_cost(row):
return row["total_with_options_per_unit"] * row["quantity"]
else:
return row["contract_unit_price"] * row["quantity"]
-
-
-def new_bus_size_finder(description: str) -> str:
- """
- Similar to prop_type_find, matches keywords to item description col and return standardized bus size type.
- now includes variable that make description input lowercase.
- To be used with .assign()
- """
-
- articulated_list = [
- "60 foot",
- "articulated",
- ]
-
- standard_bus_list = [
- "30 foot",
- "35 foot",
- "40 foot",
- "40ft",
- "45 foot",
- "standard",
- ]
-
- cutaway_list = [
- "cutaway",
- ]
-
- other_bus_size_list = ["feeder bus"]
-
- otr_bus_list = [
- "coach style",
- "over the road",
- ]
-
- item_description = description.lower().replace("-", " ").strip()
-
- if any(word in item_description for word in articulated_list):
- return "articulated"
-
- elif any(word in item_description for word in standard_bus_list):
- return "standard/conventional (30ft-45ft)"
-
- elif any(word in item_description for word in cutaway_list):
- return "cutaway"
-
- elif any(word in item_description for word in otr_bus_list):
- return "over-the-road"
-
- elif any(word in item_description for word in other_bus_size_list):
- return "other"
-
- else:
- return "not specified"
-
-
-# new prop_finder function
-def new_prop_finder(description: str) -> str:
- """
- function that matches keywords from each propulsion type list against the item description col, returns a standardized prop type
- now includes variable that make description input lowercase.
- to be used with .assign()
- """
-
- BEB_list = [
- "battery electric",
- "BEBs paratransit buses"
- ]
-
- cng_list = [
- "cng",
- "compressed natural gas"
-
- ]
-
- electric_list = [
- "electric buses",
- "electric commuter",
- "electric",
- ]
-
- FCEB_list = [
- "fuel cell",
- "hydrogen",
- #"fuel cell electric",
- #"hydrogen fuel cell",
- #"fuel cell electric bus",
- #"hydrogen electric bus",
- ]
-
- # low emission (hybrid)
- hybrid_list = [
- #"diesel electric hybrids",
- #"diesel-electric hybrids",
- #"hybrid electric",
- #"hybrid electric buses",
- #"hybrid electrics",
- "hybrids",
- "hybrid",
- ]
-
- # low emission (propane)
- propane_list = [
- #"propane buses",
- #"propaned powered vehicles",
- "propane",
- ]
-
- mix_beb_list = [
- "2 BEBs and 4 hydrogen fuel cell buses",
- ]
-
- mix_lowe_list = [
- "diesel and gas",
- ]
-
- mix_zero_low_list = [
- "15 electic, 16 hybrid",
- "4 fuel cell / 3 CNG",
- "estimated-cutaway vans (PM- award will not fund 68 buses",
- "1:CNGbus ;2 cutaway CNG buses",
- ]
-
- zero_e_list = [
- #"zero emission buses",
- #"zero emission electric",
- #"zero emission vehicles",
- "zero-emission",
- "zero emission",
- ]
-
- item_description = description.lower().replace("‐", " ").strip()
-
- if any(word in item_description for word in BEB_list) and not any(
- word in item_description for word in ["diesel", "hybrid", "fuel cell"]
- ):
- return "BEB"
-
- elif any(word in item_description for word in FCEB_list):
- return "FCEB"
-
- elif any(word in item_description for word in hybrid_list):
- return "low emission (hybrid)"
-
- elif any(word in item_description for word in mix_beb_list):
- return "mix (BEB and FCEB)"
-
- elif any(word in item_description for word in mix_lowe_list):
- return "mix (low emission)"
-
- elif any(word in item_description for word in mix_zero_low_list):
- return "mix (zero and low emission)"
-
- elif any(word in item_description for word in zero_e_list):
- return "zero-emission bus (not specified)"
-
- elif any(word in item_description for word in propane_list):
- return "low emission (propane)"
-
- elif any(word in item_description for word in electric_list):
- return "electric (not specified)"
-
- elif any(word in item_description for word in cng_list):
- return "CNG"
-
- else:
- return "not specified"
-
-#project type checker
-def project_type_checker(description: str) -> str:
- """
- function to match keywords to project description col to identy projects that only have bus procurement.
- used to identify projects into diffferent categories: bus only, bus + others, no bus procurement.
- use with .assign() to get a new col.
- """
- bus_list =[
- "bus",
- "transit vehicles",# for fta list
- "cutaway vehicles",# for fta list
- "zero-emission vehicles", # for tircp list
- "zero emission vehicles",
- "zero‐emissions vans",
- "hybrid-electric vehicles",
- "battery-electric vehicles",
- "buy new replacement vehicles", # specific string for fta list
- ]
-
- exclude_list =[
- "facility",
- #"station",
- "stops",
- "installation",
- "depot",
- "construct",
- "infrastructure",
- "signal priority",
- "improvements",
- "build",
- "chargers",
- "charging equipment",
- "install",
- "rail",
- "garage",
- "facilities",
- "bus washing system",
- "build a regional transit hub" # specific string needed for fta list
- #"associated infrastructure" may need to look at what is associated infrastructure is for ZEB
-
- ]
- proj_description = description.lower().strip()
-
- if any(word in proj_description for word in bus_list) and not any(
- word in proj_description for word in exclude_list
- ):
- return "bus only"
-
- elif any(word in proj_description for word in exclude_list) and not any(
- word in proj_description for word in bus_list
- ):
- return "non-bus components"
- elif any(word in proj_description for word in exclude_list) and any(
- word in proj_description for word in bus_list
- ):
- return "includes bus and non-bus components"
-
- else:
- return "needs review"
-
-# included assign columns
def clean_dgs_columns() -> pd.DataFrame:
"""
reads in 2 dgs sheets, adds source column, merges both DFs, snakecase columns, update dtypes for monetary columns.
merged first becaues the snakecase function messes with the dtypes for some reason
"""
- from fta_data_cleaner import gcs_path
+
# params
- file_17c = "17c compiled-Proterra Compiled Contract Usage Report .xlsx"
- file_17b = "17b compiled.xlsx"
+ file_17c = "raw_17c compiled-Proterra Compiled Contract Usage Report .xlsx"
+ file_17b = "raw_17b compiled.xlsx"
sheet_17c = "Proterra "
sheet_17b = "Usage Report Template"
@@ -291,8 +63,8 @@ def clean_dgs_columns() -> pd.DataFrame:
]
# read in data
- dgs_17c = pd.read_excel(f"{gcs_path}{file_17c}", sheet_name=sheet_17c)
- dgs_17b = pd.read_excel(f"{gcs_path}{file_17b}", sheet_name=sheet_17b)
+ dgs_17c = pd.read_excel(f"{GCS_PATH}{file_17c}", sheet_name=sheet_17c)
+ dgs_17b = pd.read_excel(f"{GCS_PATH}{file_17b}", sheet_name=sheet_17b)
# add new column to identify source
dgs_17c["source"] = "17c"
@@ -319,7 +91,7 @@ def clean_dgs_columns() -> pd.DataFrame:
return dgs_17bc2
-def agg_by_agency(df: pd.DataFrame) -> pd.DataFrame:
+def dgs_agg_by_agency(df: pd.DataFrame) -> pd.DataFrame:
"""
function that aggregates the DGS data frame by transit agency and purchase order number (PPNO) to get total cost of just buses without options.
first, dataframe is filtered for rows containing buses (does not include rows with 'not specified').
@@ -363,8 +135,7 @@ def agg_by_agency(df: pd.DataFrame) -> pd.DataFrame:
return agg_agency_bus_count3
-
-def agg_by_agency_w_options(df: pd.DataFrame) -> pd.DataFrame:
+def dgs_agg_by_agency_w_options(df: pd.DataFrame) -> pd.DataFrame:
"""
similar to the previous function, aggregates the DGS dataframe by transit agency to get total cost of buses with options.
agencies may order buses with different configurations, resulting in different total cost.
@@ -403,16 +174,16 @@ def agg_by_agency_w_options(df: pd.DataFrame) -> pd.DataFrame:
if __name__ == "__main__":
- from fta_data_cleaner import gcs_path
+
# initial df
df1 = clean_dgs_columns()
#df of just bus cost (no options)
- just_bus = agg_by_agency(df1)
+ just_bus = dgs_agg_by_agency(df1)
#df of bus cost+options
- bus_w_options = agg_by_agency_w_options(df1)
+ bus_w_options = dgs_agg_by_agency_w_options(df1)
#export serperate df's as parquet to GCS
- just_bus.to_parquet(f'{gcs_path}dgs_agg_clean.parquet')
- bus_w_options.to_parquet(f'{gcs_path}dgs_agg_w_options_clean.parquet')
\ No newline at end of file
+ just_bus.to_parquet(f'{GCS_PATH}clean_dgs_all_projects.parquet')
+ bus_w_options.to_parquet(f'{GCS_PATH}clean_dgs_bus_only_w_options.parquet')
\ No newline at end of file
diff --git a/bus_procurement_cost/dgs_usage_report_bus_analysis.ipynb b/bus_procurement_cost/dgs_usage_report_bus_analysis.ipynb
deleted file mode 100644
index cb847b29b..000000000
--- a/bus_procurement_cost/dgs_usage_report_bus_analysis.ipynb
+++ /dev/null
@@ -1,813 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "id": "d4cd3d09-86ee-439c-bb3d-081b369f48bd",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "import matplotlib.pyplot as plt\n",
- "from matplotlib.ticker import ScalarFormatter\n",
- "import numpy as np\n",
- "import pandas as pd\n",
- "import seaborn as sns\n",
- "import shared_utils\n",
- "from scipy.stats import zscore\n",
- "\n",
- "# set_option to increase max rows displayed to 200, to see entire df in 1 go/\n",
- "pd.set_option(\"display.max_rows\", 200)\n",
- "\n",
- "# function to display df info\n",
- "def df_peek(df):\n",
- " display(type(df), df.shape, df.dtypes)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "a56fb4a4-2d26-4623-89d4-d4d73e6dc9c0",
- "metadata": {
- "tags": []
- },
- "source": [
- "## Read in Raw Data\n",
- "17C and 17B via excels"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "id": "3fc06efc-e6aa-42dd-945c-1719d27ba1a1",
- "metadata": {},
- "outputs": [],
- "source": [
- "GCS_FILE_PATH ='gs://calitp-analytics-data/data-analyses/bus_procurement_cost/'\n",
- "file_17c = '17c compiled-Proterra Compiled Contract Usage Report .xlsx'\n",
- "file_17b = '17b compiled.xlsx'\n",
- "sheet_17c = 'Proterra '\n",
- "sheet_17b = 'Usage Report Template'\n",
- "\n",
- "def get_data(path, file, sheet):\n",
- " df = pd.read_excel(path + file, sheet_name=sheet)\n",
- " \n",
- " return df\n",
- "\n",
- "dgs_17c = get_data(GCS_FILE_PATH, file_17c, sheet_17c)\n",
- "dgs_17b = get_data(GCS_FILE_PATH, file_17b, sheet_17b)\n"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "6c03e147-68df-4c9e-b150-da4ef7f35652",
- "metadata": {},
- "source": [
- "## Merge data frames"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "id": "f657e2e7-b43f-46aa-870c-8c3ab0bc8404",
- "metadata": {},
- "outputs": [
- {
- "ename": "NameError",
- "evalue": "name 'dgs_17c' is not defined",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
- "Cell \u001b[0;32mIn[3], line 23\u001b[0m\n\u001b[1;32m 1\u001b[0m merge_col\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mSupplier Contract Usage ID\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 2\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mOrdering Agency Name\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 3\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mState (S) or Local (L) agency\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mExtended Contract Price Paid\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 21\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124msource\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[0;32m---> 23\u001b[0m dgs_17bc \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mmerge(dgs_17b, \u001b[43mdgs_17c\u001b[49m, how\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mouter\u001b[39m\u001b[38;5;124m'\u001b[39m, on\u001b[38;5;241m=\u001b[39m merge_col)\u001b[38;5;241m.\u001b[39mfillna(\u001b[38;5;241m0\u001b[39m)\n",
- "\u001b[0;31mNameError\u001b[0m: name 'dgs_17c' is not defined"
- ]
- }
- ],
- "source": [
- "merge_col=['Supplier Contract Usage ID',\n",
- " 'Ordering Agency Name',\n",
- " 'State (S) or Local (L) agency',\n",
- " 'Purchasing Authority Number (for State departments)',\n",
- " 'Agency Billing Code',\n",
- " 'Purchase Order Number',\n",
- " 'Purchase Order Date',\n",
- " 'Delivery Date',\n",
- " 'Contract Line Item Number (CLIN) (RFP ID)',\n",
- " 'UNSPSC Code\\n(Version 10)',\n",
- " 'Manufacturer Part Number (OEM #)',\n",
- " 'Manufacturer (OEM)',\n",
- " 'Item Description',\n",
- " 'Unit of Measure',\n",
- " 'Quantity in \\nUnit of Measure\\n',\n",
- " 'Quantity',\n",
- " 'List Price/MSRP',\n",
- " 'Index Date / Catalog Version',\n",
- " 'Contract Unit Price',\n",
- " 'Extended Contract Price Paid',\n",
- " 'source']\n",
- "\n",
- "dgs_17bc = pd.merge(dgs_17b, dgs_17c, how='outer', on= merge_col).fillna(0)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "7bab257b-5715-4dfb-9b08-dddb19c05d23",
- "metadata": {},
- "source": [
- "## Data Cleaning and QC"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "30d97c40-8b3e-4d7a-955e-8ae4301c8602",
- "metadata": {},
- "outputs": [],
- "source": [
- "#snake case columns\n",
- "def snake_case(df):\n",
- " df.columns = df.columns.str.lower().str.replace(' ', '_').str.strip()\n",
- " \n",
- "snake_case(dgs_17bc)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "cb5aba84-349f-4ae0-9d22-2d4024b1e41f",
- "metadata": {},
- "outputs": [],
- "source": [
- "# check financial columns to be `int64`\n",
- "money =['contract_unit_price',\n",
- " 'extended_contract_price_paid',\n",
- " 'total_with_options_per_unit',\n",
- " 'grand_total']\n",
- "\n",
- "# loop that takes money list to convert to int64 dtype\n",
- "for column in money:\n",
- " dgs_17bc[column] = dgs_17bc[column].astype('int64')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "7e8378dd-f0bb-42b0-8582-a99be893dc0c",
- "metadata": {},
- "outputs": [],
- "source": [
- "# drop unnessary columns?\n",
- "drops =['supplier_contract_usage_id',\n",
- " 'state_(s)_or_local_(l)_agency',\n",
- " 'purchasing_authority_number____________________(for_state_departments)',\n",
- " 'agency_billing_code',\n",
- " 'unspsc_code\\n(version_10)',\n",
- " 'unit_of_measure',\n",
- " 'epp_(y/n)_x',\n",
- " 'epp_(y/n)_y',\n",
- " 'list_price/msrp',\n",
- " 'index_date_/_catalog_version',\n",
- " 'core/_noncore',\n",
- " 'group_id/_segment_id']\n",
- "\n",
- "dgs_17bc.drop(columns=drops, inplace=True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "b106dfff-eb1c-46ab-8805-ab362da92366",
- "metadata": {},
- "outputs": [],
- "source": [
- "# new column for total cost\n",
- "# 17b >> `grand total` = total_with_options * quanity\n",
- "# 17c >> `extended contract price paid` = contract unit price * quanity\n",
- "\n",
- "# what im trying to do: create a new column called \"total_cost\". for each row, if `totals_with_options_per_unit` is >=0, then multiply `totals_with_options_per_unit` by `quanity'\n",
- "# if 0, then multiple `contract_unit_price` by `quanity`.\n",
- "\n",
- "# df['total_cost'] = df['quanity'] * df['total_with_options'] or df['contract unit price']???\n",
- "\n",
- "def calculate_total_cost(row):\n",
- " if row['total_with_options_per_unit'] > 0:\n",
- " return row['total_with_options_per_unit'] * row['quantity']\n",
- " else:\n",
- " return row['contract_unit_price'] * row['quantity']\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "a5356705-c2bb-4945-95b5-03f709219287",
- "metadata": {},
- "outputs": [],
- "source": [
- "# new colum for total cost\n",
- "\n",
- "dgs_17bc['total_cost'] = dgs_17bc.apply(calculate_total_cost, axis=1)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "5ab1ba22-5200-44d7-b40f-400aa8efcf76",
- "metadata": {},
- "outputs": [],
- "source": [
- "# comparing totals columns to new `total_cost` column to see if logic works\n",
- "# 17b = Grand total\n",
- "# 17c = Extended Contract Price Paid\n",
- "keep_col=['ordering_agency_name',\n",
- " 'purchase_order_number',\n",
- " 'item_description',\n",
- " 'source',\n",
- " 'grand_total',\n",
- " 'extended_contract_price_paid',\n",
- " 'total_cost']\n",
- "\n",
- "col_compare = dgs_17bc[keep_col]\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "df7343dc-e164-40e3-a512-32edd09d53d4",
- "metadata": {},
- "outputs": [],
- "source": [
- "# new column for prop_type\n",
- "\n",
- "prop_list = ['Battery Electric Bus',\n",
- " 'battery electric bus',\n",
- " 'Fuel Cell Electric Bus',\n",
- " 'fuel cell electric bus',\n",
- " 'Hydrogen Electic Bus',\n",
- " 'hydrogen electric bus',\n",
- " 'battery electric',\n",
- " ]\n",
- "\n",
- "# function to match keywords to list\n",
- "def prop_type_finder(description):\n",
- " for keyword in prop_list:\n",
- " if keyword in description:\n",
- " return keyword\n",
- " return \"not specified\"\n",
- "\n",
- "# add new col `prop_type`, fill it with values based on project_description using prop_type_finder function\n",
- "dgs_17bc[\"prop_type\"] = dgs_17bc[\"item_description\"].apply(prop_type_finder)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "6048c764-8cc0-4e57-9f35-93626eb5167d",
- "metadata": {},
- "outputs": [],
- "source": [
- "# new column for bus size type\n",
- "\n",
- "size_list =['35 Foot',\n",
- " '40 Foot',\n",
- " '60 foot',\n",
- " '40 foot',\n",
- " '35 foot',\n",
- " ]\n",
- "\n",
- "def bus_size_finder(description):\n",
- " for keyword in size_list:\n",
- " if keyword in description:\n",
- " return keyword\n",
- " return \"not specified\"\n",
- "\n",
- "dgs_17bc[\"bus_size_type\"] = dgs_17bc[\"item_description\"].apply(bus_size_finder)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "b2ed8168-1b24-444f-9f53-ba6a6d911ed9",
- "metadata": {
- "tags": []
- },
- "source": [
- "## Aggregate by Agency\n",
- "need a df that:\n",
- "1. each row is an agency\n",
- "2. total quantity of buses only (not manuals, equipment, part, warranty)\n",
- "3. aggregates total cost for the agency (bus, manals, etc)\n",
- "4. keep the prop_type of the bus (should be no 'not specified')\n",
- "5. keep the bus_size_type for the bus (no 'not specified')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 75,
- "id": "62793376-d7c4-4d59-8249-56c0b18a123a",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(30, 6)"
- ]
- },
- "execution_count": 75,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# agency bus count\n",
- "# filtered df by item desc containing 'bus' or 'Bus'\n",
- "agg_agency_bus_count = dgs_17bc[(dgs_17bc['item_description'].str.contains('bus')) | (dgs_17bc['item_description'].str.contains('Bus'))]\n",
- "\n",
- "agg_agency_bus_count = agg_agency_bus_count[['ordering_agency_name',\n",
- " 'item_description',\n",
- " 'quantity',\n",
- " 'source',\n",
- " 'total_cost',\n",
- " 'prop_type',\n",
- " 'bus_size_type']]\n",
- "\n",
- "#i think this is it.. the numbers are matching up\n",
- "agg_agency_bus_count = agg_agency_bus_count.groupby('ordering_agency_name').agg({\n",
- " 'quantity':'sum',\n",
- " 'total_cost':'sum',\n",
- " 'prop_type':'max',\n",
- " 'bus_size_type':'max',\n",
- " 'source':'max',\n",
- "}).reset_index()\n",
- "\n",
- "\n",
- "# looks good. manualy double checked agsint pivot tables in excel. GOOD TO GO\n",
- " "
- ]
- },
- {
- "cell_type": "markdown",
- "id": "8b842838-6faa-45d1-b4e1-4b757a4a3045",
- "metadata": {},
- "source": [
- "## Export Cleaned data\n",
- "save out as parquet"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 76,
- "id": "4a8c202e-d6bd-463c-bcb6-7a4fe704d0da",
- "metadata": {},
- "outputs": [],
- "source": [
- "agg_agency_bus_count.to_parquet('gs://calitp-analytics-data/data-analyses/bus_procurement_cost/dgs_agg_clean.parquet')"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "85153b50-0f1f-433b-b320-5d6d70a8bc71",
- "metadata": {},
- "source": [
- "## Test to read in parquet"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "id": "9456bbc8-506b-4331-ae17-ab648ce959b6",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(30, 6)"
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "url= 'gs://calitp-analytics-data/data-analyses/bus_procurement_cost/dgs_agg_clean.parquet'\n",
- "dgs = pd.read_parquet(url)\n",
- "\n",
- "dgs.shape"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "id": "cf1a46bc-4b08-445e-8ea0-4c729f96b0d4",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- "
ordering_agency_name
\n",
- "
quantity
\n",
- "
total_cost
\n",
- "
prop_type
\n",
- "
bus_size_type
\n",
- "
source
\n",
- "
\n",
- " \n",
- " \n",
- "
\n",
- "
0
\n",
- "
Alameda County Transit Authority
\n",
- "
20
\n",
- "
22846640
\n",
- "
hydrogen electric bus
\n",
- "
40 foot
\n",
- "
17b
\n",
- "
\n",
- "
\n",
- "
1
\n",
- "
CITY OF PORTERVILLE (PORTERVILLE, CA)
\n",
- "
3
\n",
- "
2781891
\n",
- "
battery electric bus
\n",
- "
35 foot
\n",
- "
17b
\n",
- "
\n",
- "
\n",
- "
2
\n",
- "
CULVER CITY TRANSPORTATION DEPARTMENT (CULVER ...
\n",
- "
4
\n",
- "
3623536
\n",
- "
Battery Electric Bus
\n",
- "
40 Foot
\n",
- "
17b
\n",
- "
\n",
- "
\n",
- "
3
\n",
- "
City of Roseville
\n",
- "
10
\n",
- "
6990000
\n",
- "
Battery Electric Bus
\n",
- "
35 Foot
\n",
- "
17c
\n",
- "
\n",
- "
\n",
- "
4
\n",
- "
City of San Luis Obispo
\n",
- "
1
\n",
- "
689000
\n",
- "
Battery Electric Bus
\n",
- "
35 Foot
\n",
- "
17c
\n",
- "
\n",
- "
\n",
- "
5
\n",
- "
City of Santa Rosa(Santa Rosa CityBus)
\n",
- "
5
\n",
- "
3495000
\n",
- "
Battery Electric Bus
\n",
- "
40 Foot
\n",
- "
17c
\n",
- "
\n",
- "
\n",
- "
6
\n",
- "
City of Visalia - Visalia City Coach(Visalia T...
\n",
- "
4
\n",
- "
2756000
\n",
- "
Battery Electric Bus
\n",
- "
35 Foot
\n",
- "
17c
\n",
- "
\n",
- "
\n",
- "
7
\n",
- "
Foothill Transit, West Covina, CA
\n",
- "
33
\n",
- "
37642044
\n",
- "
Hydrogen Electic Bus
\n",
- "
40 Foot
\n",
- "
17b
\n",
- "
\n",
- "
\n",
- "
8
\n",
- "
GOLDEN EMPIRE TRANSIT (BAKERSFIELD, CA)
\n",
- "
5
\n",
- "
5406355
\n",
- "
Hydrogen Electic Bus
\n",
- "
40 Foot
\n",
- "
17b
\n",
- "
\n",
- "
\n",
- "
9
\n",
- "
Golden Empire Transit
\n",
- "
5
\n",
- "
5458305
\n",
- "
Hydrogen Electic Bus
\n",
- "
40 Foot
\n",
- "
17b
\n",
- "
\n",
- "
\n",
- "
10
\n",
- "
Lane Transit (Oregon)
\n",
- "
30
\n",
- "
27894999
\n",
- "
battery electric bus
\n",
- "
40 foot
\n",
- "
17b
\n",
- "
\n",
- "
\n",
- "
11
\n",
- "
Napa Valley Transportation Authority
\n",
- "
2
\n",
- "
1398000
\n",
- "
Battery Electric Bus
\n",
- "
40 Foot
\n",
- "
17c
\n",
- "
\n",
- "
\n",
- "
12
\n",
- "
ORANGE COUNTY TRANSPORTATION AUTHORITY (ORANGE...
\n",
- "
10
\n",
- "
9319520
\n",
- "
Battery Electric Bus
\n",
- "
40 Foot
\n",
- "
17b
\n",
- "
\n",
- "
\n",
- "
13
\n",
- "
SLO TRANSIT (SAN LUIS OBISPO, CA)
\n",
- "
1
\n",
- "
847214
\n",
- "
battery electric bus
\n",
- "
35 foot
\n",
- "
17b
\n",
- "
\n",
- "
\n",
- "
14
\n",
- "
SUNLINE TRANSIT AGENCY (THOUSAND PALMS)
\n",
- "
5
\n",
- "
5755155
\n",
- "
Fuel Cell Electric Bus
\n",
- "
40 Foot
\n",
- "
17b
\n",
- "
\n",
- "
\n",
- "
15
\n",
- "
SUNLINE TRANSIT AGENCY (THOUSAND PALMS, CA)
\n",
- "
5
\n",
- "
5771865
\n",
- "
Hydrogen Electic Bus
\n",
- "
40 Foot
\n",
- "
17b
\n",
- "
\n",
- "
\n",
- "
16
\n",
- "
Sacramento County Airport System
\n",
- "
5
\n",
- "
3495000
\n",
- "
Battery Electric Bus
\n",
- "
40 Foot
\n",
- "
17c
\n",
- "
\n",
- "
\n",
- "
17
\n",
- "
San Diego Metro
\n",
- "
12
\n",
- "
18759576
\n",
- "
battery electric
\n",
- "
60 foot
\n",
- "
17b
\n",
- "
\n",
- "
\n",
- "
18
\n",
- "
Santa Maria Area Transit
\n",
- "
2
\n",
- "
1378000
\n",
- "
Battery Electric Bus
\n",
- "
35 Foot
\n",
- "
17c
\n",
- "
\n",
- "
\n",
- "
19
\n",
- "
Santa Maria Regional Transit
\n",
- "
5
\n",
- "
4552010
\n",
- "
Battery Electric Bus
\n",
- "
35 Foot
\n",
- "
17c
\n",
- "
\n",
- "
\n",
- "
20
\n",
- "
Santa Rosa City Bus
\n",
- "
4
\n",
- "
2796000
\n",
- "
Battery Electric Bus
\n",
- "
40 Foot
\n",
- "
17c
\n",
- "
\n",
- "
\n",
- "
21
\n",
- "
Sonoma County Transit
\n",
- "
10
\n",
- "
8990000
\n",
- "
Battery Electric Bus
\n",
- "
40 Foot
\n",
- "
17c
\n",
- "
\n",
- "
\n",
- "
22
\n",
- "
The Bus, City of Merced
\n",
- "
5
\n",
- "
4786285
\n",
- "
battery electric bus
\n",
- "
40 foot
\n",
- "
17b
\n",
- "
\n",
- "
\n",
- "
23
\n",
- "
Transit Joint Powers Authority for Merced County
\n",
- "
3
\n",
- "
2077000
\n",
- "
Battery Electric Bus
\n",
- "
40 Foot
\n",
- "
17c
\n",
- "
\n",
- "
\n",
- "
24
\n",
- "
UC DAVIS (UNITRANS) (DAVIS, CA)
\n",
- "
10
\n",
- "
9321926
\n",
- "
battery electric bus
\n",
- "
40 foot
\n",
- "
17b
\n",
- "
\n",
- "
\n",
- "
25
\n",
- "
UCLA FLEET & TRANSIT
\n",
- "
2
\n",
- "
2008826
\n",
- "
battery electric bus
\n",
- "
40 foot
\n",
- "
17b
\n",
- "
\n",
- "
\n",
- "
26
\n",
- "
University of California - San Diego
\n",
- "
6
\n",
- "
4134000
\n",
- "
Battery Electric Bus
\n",
- "
35 Foot
\n",
- "
17c
\n",
- "
\n",
- "
\n",
- "
27
\n",
- "
University of California, Irvine
\n",
- "
5
\n",
- "
3995000
\n",
- "
Battery Electric Bus
\n",
- "
40 Foot
\n",
- "
17c
\n",
- "
\n",
- "
\n",
- "
28
\n",
- "
VACAVILLE PUBLIC TRANSPORTATION (CITY COACH) (...
\n",
- "
10
\n",
- "
10175590
\n",
- "
battery electric bus
\n",
- "
35 foot
\n",
- "
17b
\n",
- "
\n",
- "
\n",
- "
29
\n",
- "
VICTOR VALLEY TRANSIT AUTHORITY (VVTA)
\n",
- "
5
\n",
- "
4508160
\n",
- "
Battery Electric Bus
\n",
- "
40 Foot
\n",
- "
17b
\n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " ordering_agency_name quantity total_cost \\\n",
- "0 Alameda County Transit Authority 20 22846640 \n",
- "1 CITY OF PORTERVILLE (PORTERVILLE, CA) 3 2781891 \n",
- "2 CULVER CITY TRANSPORTATION DEPARTMENT (CULVER ... 4 3623536 \n",
- "3 City of Roseville 10 6990000 \n",
- "4 City of San Luis Obispo 1 689000 \n",
- "5 City of Santa Rosa(Santa Rosa CityBus) 5 3495000 \n",
- "6 City of Visalia - Visalia City Coach(Visalia T... 4 2756000 \n",
- "7 Foothill Transit, West Covina, CA 33 37642044 \n",
- "8 GOLDEN EMPIRE TRANSIT (BAKERSFIELD, CA) 5 5406355 \n",
- "9 Golden Empire Transit 5 5458305 \n",
- "10 Lane Transit (Oregon) 30 27894999 \n",
- "11 Napa Valley Transportation Authority 2 1398000 \n",
- "12 ORANGE COUNTY TRANSPORTATION AUTHORITY (ORANGE... 10 9319520 \n",
- "13 SLO TRANSIT (SAN LUIS OBISPO, CA) 1 847214 \n",
- "14 SUNLINE TRANSIT AGENCY (THOUSAND PALMS) 5 5755155 \n",
- "15 SUNLINE TRANSIT AGENCY (THOUSAND PALMS, CA) 5 5771865 \n",
- "16 Sacramento County Airport System 5 3495000 \n",
- "17 San Diego Metro 12 18759576 \n",
- "18 Santa Maria Area Transit 2 1378000 \n",
- "19 Santa Maria Regional Transit 5 4552010 \n",
- "20 Santa Rosa City Bus 4 2796000 \n",
- "21 Sonoma County Transit 10 8990000 \n",
- "22 The Bus, City of Merced 5 4786285 \n",
- "23 Transit Joint Powers Authority for Merced County 3 2077000 \n",
- "24 UC DAVIS (UNITRANS) (DAVIS, CA) 10 9321926 \n",
- "25 UCLA FLEET & TRANSIT 2 2008826 \n",
- "26 University of California - San Diego 6 4134000 \n",
- "27 University of California, Irvine 5 3995000 \n",
- "28 VACAVILLE PUBLIC TRANSPORTATION (CITY COACH) (... 10 10175590 \n",
- "29 VICTOR VALLEY TRANSIT AUTHORITY (VVTA) 5 4508160 \n",
- "\n",
- " prop_type bus_size_type source \n",
- "0 hydrogen electric bus 40 foot 17b \n",
- "1 battery electric bus 35 foot 17b \n",
- "2 Battery Electric Bus 40 Foot 17b \n",
- "3 Battery Electric Bus 35 Foot 17c \n",
- "4 Battery Electric Bus 35 Foot 17c \n",
- "5 Battery Electric Bus 40 Foot 17c \n",
- "6 Battery Electric Bus 35 Foot 17c \n",
- "7 Hydrogen Electic Bus 40 Foot 17b \n",
- "8 Hydrogen Electic Bus 40 Foot 17b \n",
- "9 Hydrogen Electic Bus 40 Foot 17b \n",
- "10 battery electric bus 40 foot 17b \n",
- "11 Battery Electric Bus 40 Foot 17c \n",
- "12 Battery Electric Bus 40 Foot 17b \n",
- "13 battery electric bus 35 foot 17b \n",
- "14 Fuel Cell Electric Bus 40 Foot 17b \n",
- "15 Hydrogen Electic Bus 40 Foot 17b \n",
- "16 Battery Electric Bus 40 Foot 17c \n",
- "17 battery electric 60 foot 17b \n",
- "18 Battery Electric Bus 35 Foot 17c \n",
- "19 Battery Electric Bus 35 Foot 17c \n",
- "20 Battery Electric Bus 40 Foot 17c \n",
- "21 Battery Electric Bus 40 Foot 17c \n",
- "22 battery electric bus 40 foot 17b \n",
- "23 Battery Electric Bus 40 Foot 17c \n",
- "24 battery electric bus 40 foot 17b \n",
- "25 battery electric bus 40 foot 17b \n",
- "26 Battery Electric Bus 35 Foot 17c \n",
- "27 Battery Electric Bus 40 Foot 17c \n",
- "28 battery electric bus 35 foot 17b \n",
- "29 Battery Electric Bus 40 Foot 17b "
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "dgs"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "3d7a7889-13e0-4b5a-b77c-6808619c19b1",
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.9.13"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/bus_procurement_cost/fta_data_cleaner.py b/bus_procurement_cost/fta_data_cleaner.py
index c3deda5c9..a519be09c 100644
--- a/bus_procurement_cost/fta_data_cleaner.py
+++ b/bus_procurement_cost/fta_data_cleaner.py
@@ -2,10 +2,7 @@
import pandas as pd
import shared_utils
from calitp_data_analysis.sql import to_snakecase
-from dgs_data_cleaner import new_bus_size_finder, new_prop_finder, project_type_checker
-from tircp_data_cleaner import col_row_updater
-
-gcs_path = "gs://calitp-analytics-data/data-analyses/bus_procurement_cost/"
+from bus_cost_utils import *
def col_splitter(
df: pd.DataFrame,
@@ -26,8 +23,7 @@ def col_splitter(
return df
-
-def agg_just_bus(df: pd.DataFrame) -> pd.DataFrame:
+def fta_agg_bus_only(df: pd.DataFrame) -> pd.DataFrame:
"""
filters FTA data to only show projects with bus procurement (bus count > 0).
then filters projects for new_project_type = bus only
@@ -57,17 +53,16 @@ def agg_just_bus(df: pd.DataFrame) -> pd.DataFrame:
return df2
-
def clean_fta_columns() -> pd.DataFrame:
"""
Main function to clean FTA data. Reads in data, changes datatypes, change specific values.
"""
# params
- file = "data-analyses_bus_procurement_cost_fta_press_release_data_csv.csv"
+ file = "raw_data-analyses_bus_procurement_cost_fta_press_release_data_csv.csv"
# read in data
- df = pd.read_csv(f"{gcs_path}{file}")
+ df = pd.read_csv(f"{GCS_PATH}{file}")
# snakecase df
df = to_snakecase(df)
@@ -83,14 +78,14 @@ def clean_fta_columns() -> pd.DataFrame:
# rename initial propulsion type col to propulsion category
df = df.rename(columns={"propulsion_type": "prosulsion_category"})
- # splittign `approx_#_of_buses col to get bus count
+ # splitting `approx_#_of_buses` col to get bus count
df1 = col_splitter(df, "approx_#_of_buses", "bus_count", "extract_prop_type", "(")
# assign new columns via new_prop_finder and new_bus_size_finder
df2 = df1.assign(
new_prop_type_finder=df1["description"].apply(new_prop_finder),
new_bus_size_type=df1["description"].apply(new_bus_size_finder),
- new_project_type=df1["description"].apply(project_type_checker)
+ new_project_type=df1["description"].apply(project_type_finder)
)
# cleaning specific values
@@ -115,15 +110,14 @@ def clean_fta_columns() -> pd.DataFrame:
return df2
-
if __name__ == "__main__":
# initial df (all projects)
all_projects = clean_fta_columns()
# projects with bus count > 0 only.
- just_bus = agg_just_bus(all_projects)
+ just_bus = fta_agg_bus_only(all_projects)
# export both DFs
- all_projects.to_parquet(f"{gcs_path}fta_all_projects_clean.parquet")
- just_bus.to_parquet(f"{gcs_path}fta_bus_cost_clean.parquet")
\ No newline at end of file
+ all_projects.to_parquet(f"{GCS_PATH}clean_fta_all_projects.parquet")
+ just_bus.to_parquet(f"{GCS_PATH}clean_fta_bus_only.parquet")
\ No newline at end of file
diff --git a/bus_procurement_cost/refactor_bus_cost.ipynb b/bus_procurement_cost/refactor_bus_cost.ipynb
new file mode 100644
index 000000000..196491b0c
--- /dev/null
+++ b/bus_procurement_cost/refactor_bus_cost.ipynb
@@ -0,0 +1,2066 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "e50ef668-5812-4b3a-a257-ef0a424b46e3",
+ "metadata": {},
+ "source": [
+ "# Bus Cost Refactor\n",
+ "## overall imports and data sources"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "c2bb63c6-6457-4433-aa9d-0136b2690464",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "pd.set_option(\"display.max_rows\", 300)\n",
+ "pd.set_option(\"display.max_columns\", 100)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "910ee0fa-38ce-44f3-8e18-4cdf740e1fd0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Old script imports\n",
+ "from fta_data_cleaner import *\n",
+ "from dgs_data_cleaner import *\n",
+ "from tircp_data_cleaner import *"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "8a1fd0b4-14a6-4cad-bb15-0ce0437ed125",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# updated Script imports for bus cost utils\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "c3552c45-8b28-4bbe-ae82-f2d726a45937",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#immutable GCS path\n",
+ "# save to bus cost utils\n",
+ "GCS_PATH = \"gs://calitp-analytics-data/data-analyses/bus_procurement_cost/\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "e1b1f367-1dac-463f-8790-2e5134b7e79b",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/opt/conda/lib/python3.9/site-packages/openpyxl/worksheet/_reader.py:312: UserWarning: Data Validation extension is not supported and will be removed\n",
+ " warn(msg)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# links to all Raw Data\n",
+ "fta_raw = pd.read_csv(f\"{GCS_PATH}raw_data-analyses_bus_procurement_cost_fta_press_release_data_csv.csv\")\n",
+ "tircp_raw = pd.read_excel(f\"{GCS_PATH}raw_TIRCP Tracking Sheets 2_1-10-2024.xlsx\", sheet_name=\"Project Tracking\")\n",
+ "dgs17b_raw = pd.read_excel(f\"{GCS_PATH}raw_17b compiled.xlsx\", sheet_name = \"Usage Report Template\")\n",
+ "dgs17c_raw = pd.read_excel(f\"{GCS_PATH}raw_17c compiled-Proterra Compiled Contract Usage Report .xlsx\", sheet_name = \"Proterra \")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d04911c1-e839-41fe-87b3-5065586f2223",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "# Functions to Save\n",
+ "for new `bus_cost_utils.py` script"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "29f25932-dddd-409f-a57f-eef29570a178",
+ "metadata": {},
+ "source": [
+ "## save to bus_cost_utils.py\n",
+ "for everything to use from"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "c6dacaba-c6f7-4cb0-afef-a84f77de25fc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# NEW FUNCTION\n",
+ "# moved this to bus_cost_utils\n",
+ "def bus_min_max_summary(data:pd.DataFrame, col1:str, col_list=[\"transit_agency\",\n",
+ " \"total_agg_cost\",\n",
+ " \"total_bus_count\",\n",
+ " \"new_cost_per_bus\"]):\n",
+ " \"\"\"\n",
+ " function to display min/max of specific column in aggregated bus df.\n",
+ " \n",
+ " \"\"\"\n",
+ " \n",
+ " return display(\n",
+ " Markdown(f\"**Max {col1}**\"),\n",
+ " data[data[col1] == data[col1].max()][col_list],\n",
+ " Markdown(f\"**Min {col1}**\"),\n",
+ " data[data[col1] == data[col1].min()][col_list]\n",
+ " )\n",
+ " \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "5513b941-ecdb-405e-bfd6-952df6b8f8b4",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# NEW PROP FINDER\n",
+ "def new_prop_finder(description: str) -> str:\n",
+ " \"\"\"\n",
+ " function that matches keywords from each propulsion type list against the item description col, returns a standardized prop type\n",
+ " now includes variable that make description input lowercase.\n",
+ " to be used with .assign()\n",
+ " \"\"\"\n",
+ "\n",
+ " BEB_list = [\n",
+ " \"battery electric\",\n",
+ " \"BEBs paratransit buses\"\n",
+ " ]\n",
+ "\n",
+ " cng_list = [\n",
+ " \"cng\",\n",
+ " \"compressed natural gas\" \n",
+ " ]\n",
+ "\n",
+ " electric_list = [\n",
+ " \"electric buses\",\n",
+ " \"electric commuter\",\n",
+ " \"electric\",\n",
+ " ]\n",
+ "\n",
+ " FCEB_list = [\n",
+ " \"fuel cell\",\n",
+ " \"hydrogen\",\n",
+ " #\"fuel cell electric\",\n",
+ " #\"hydrogen fuel cell\",\n",
+ " #\"fuel cell electric bus\",\n",
+ " #\"hydrogen electric bus\",\n",
+ " ]\n",
+ "\n",
+ " # low emission (hybrid)\n",
+ " hybrid_list = [\n",
+ " #\"diesel electric hybrids\",\n",
+ " #\"diesel-electric hybrids\",\n",
+ " #\"hybrid electric\",\n",
+ " #\"hybrid electric buses\",\n",
+ " #\"hybrid electrics\",\n",
+ " \"hybrids\",\n",
+ " \"hybrid\",\n",
+ " ]\n",
+ "\n",
+ " # low emission (propane)\n",
+ " propane_list = [\n",
+ " #\"propane buses\",\n",
+ " #\"propaned powered vehicles\",\n",
+ " \"propane\",\n",
+ " ]\n",
+ "\n",
+ " mix_beb_list = [\n",
+ " \"2 BEBs and 4 hydrogen fuel cell buses\",\n",
+ " ]\n",
+ "\n",
+ " mix_lowe_list = [\n",
+ " \"diesel and gas\",\n",
+ " ]\n",
+ "\n",
+ " mix_zero_low_list = [\n",
+ " \"15 electic, 16 hybrid\",\n",
+ " \"4 fuel cell / 3 CNG\",\n",
+ " \"estimated-cutaway vans (PM- award will not fund 68 buses\",\n",
+ " \"1:CNGbus ;2 cutaway CNG buses\",\n",
+ " ]\n",
+ "\n",
+ " zero_e_list = [\n",
+ " #\"zero emission buses\",\n",
+ " #\"zero emission electric\",\n",
+ " #\"zero emission vehicles\",\n",
+ " \"zero-emission\",\n",
+ " \"zero emission\",\n",
+ " ]\n",
+ "\n",
+ " item_description = description.lower().replace(\"‐\", \" \").strip()\n",
+ "\n",
+ " if any(word in item_description for word in BEB_list) and not any(\n",
+ " word in item_description for word in [\"diesel\", \"hybrid\", \"fuel cell\"]\n",
+ " ):\n",
+ " return \"BEB\"\n",
+ "\n",
+ " elif any(word in item_description for word in FCEB_list):\n",
+ " return \"FCEB\"\n",
+ "\n",
+ " elif any(word in item_description for word in hybrid_list):\n",
+ " return \"low emission (hybrid)\"\n",
+ "\n",
+ " elif any(word in item_description for word in mix_beb_list):\n",
+ " return \"mix (BEB and FCEB)\"\n",
+ "\n",
+ " elif any(word in item_description for word in mix_lowe_list):\n",
+ " return \"mix (low emission)\"\n",
+ "\n",
+ " elif any(word in item_description for word in mix_zero_low_list):\n",
+ " return \"mix (zero and low emission)\"\n",
+ "\n",
+ " elif any(word in item_description for word in zero_e_list):\n",
+ " return \"zero-emission bus (not specified)\"\n",
+ "\n",
+ " elif any(word in item_description for word in propane_list):\n",
+ " return \"low emission (propane)\"\n",
+ "\n",
+ " elif any(word in item_description for word in electric_list):\n",
+ " return \"electric (not specified)\"\n",
+ " \n",
+ " elif any(word in item_description for word in cng_list):\n",
+ " return \"CNG\"\n",
+ "\n",
+ " else:\n",
+ " return \"not specified\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "8e18cbe6-bde7-4c30-8a8a-aefd8d619821",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "def new_bus_size_finder(description: str) -> str:\n",
+ " \"\"\"\n",
+ " Similar to prop_type_find, matches keywords to item description col and return standardized bus size type.\n",
+ " now includes variable that make description input lowercase.\n",
+ " To be used with .assign()\n",
+ " \"\"\"\n",
+ "\n",
+ " articulated_list = [\n",
+ " \"60 foot\",\n",
+ " \"articulated\",\n",
+ " ]\n",
+ "\n",
+ " standard_bus_list = [\n",
+ " \"30 foot\",\n",
+ " \"35 foot\",\n",
+ " \"40 foot\",\n",
+ " \"40ft\",\n",
+ " \"45 foot\",\n",
+ " \"standard\",\n",
+ " ]\n",
+ "\n",
+ " cutaway_list = [\n",
+ " \"cutaway\",\n",
+ " ]\n",
+ "\n",
+ " other_bus_size_list = [\"feeder bus\"]\n",
+ "\n",
+ " otr_bus_list = [\n",
+ " \"coach style\",\n",
+ " \"over the road\",\n",
+ " ]\n",
+ "\n",
+ " item_description = description.lower().replace(\"-\", \" \").strip()\n",
+ "\n",
+ " if any(word in item_description for word in articulated_list):\n",
+ " return \"articulated\"\n",
+ "\n",
+ " elif any(word in item_description for word in standard_bus_list):\n",
+ " return \"standard/conventional (30ft-45ft)\"\n",
+ "\n",
+ " elif any(word in item_description for word in cutaway_list):\n",
+ " return \"cutaway\"\n",
+ "\n",
+ " elif any(word in item_description for word in otr_bus_list):\n",
+ " return \"over-the-road\"\n",
+ "\n",
+ " elif any(word in item_description for word in other_bus_size_list):\n",
+ " return \"other\"\n",
+ "\n",
+ " else:\n",
+ " return \"not specified\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "8004cc2d-957f-4e9a-9ca8-2a6f9aba9ffb",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "def project_type_finder(description: str) -> str:\n",
+ " \"\"\"\n",
+ " function to match keywords to project description col to identify projects that only have bus procurement.\n",
+ " used to identify projects into diffferent categories: bus only, bus + others, no bus procurement.\n",
+ " use with .assign() to get a new col.\n",
+ " \"\"\"\n",
+ " bus_list =[\n",
+ " \"bus\",\n",
+ " \"transit vehicles\",# for fta list\n",
+ " \"cutaway vehicles\",# for fta list\n",
+ " \"zero-emission vehicles\", # for tircp list\n",
+ " \"zero emission vehicles\",\n",
+ " \"zero‐emissions vans\",\n",
+ " \"hybrid-electric vehicles\",\n",
+ " \"battery-electric vehicles\",\n",
+ " \"buy new replacement vehicles\", # specific string for fta list\n",
+ " ]\n",
+ " \n",
+ " exclude_list =[\n",
+ " \"facility\",\n",
+ " #\"station\",\n",
+ " \"stops\",\n",
+ " \"installation\",\n",
+ " \"depot\",\n",
+ " \"construct\",\n",
+ " \"infrastructure\",\n",
+ " \"signal priority\",\n",
+ " \"improvements\",\n",
+ " \"build\",\n",
+ " \"chargers\",\n",
+ " \"charging equipment\",\n",
+ " \"install\",\n",
+ " \"rail\",\n",
+ " \"garage\",\n",
+ " \"facilities\",\n",
+ " \"bus washing system\",\n",
+ " \"build a regional transit hub\" # specific string needed for fta list\n",
+ " #\"associated infrastructure\" may need to look at what is associated infrastructure is for ZEB \n",
+ " \n",
+ " ]\n",
+ " proj_description = description.lower().strip()\n",
+ "\n",
+ " if any(word in proj_description for word in bus_list) and not any(\n",
+ " word in proj_description for word in exclude_list\n",
+ " ):\n",
+ " return \"bus only\"\n",
+ " \n",
+ " elif any(word in proj_description for word in exclude_list) and not any(\n",
+ " word in proj_description for word in bus_list\n",
+ " ):\n",
+ " return \"non-bus components\"\n",
+ " \n",
+ " elif any(word in proj_description for word in exclude_list) and any(\n",
+ " word in proj_description for word in bus_list\n",
+ " ):\n",
+ " return \"includes bus and non-bus components\"\n",
+ " \n",
+ " else:\n",
+ " return \"needs review\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "c0ca9254-2983-4cab-845c-f9bfb0229417",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "def col_row_updater(df: pd.DataFrame, col1: str, val1, col2: str, new_val):\n",
+ " \"\"\"\n",
+ " function used to update values at specificed columns and row value.\n",
+ " \"\"\"\n",
+ " df.loc[df[col1] == val1, col2] = new_val\n",
+ " \n",
+ " return"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "fa387f41-c9b3-455a-9829-cfabb3f98c9b",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Moved to new bus_cost_util\n",
+ "\n",
+ "def outlier_flag(col):\n",
+ " \"\"\"\n",
+ " function to flag outlier rows. use with .apply()\n",
+ " \"\"\"\n",
+ " \n",
+ " return col <= -3 or col >= 3\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d46d8747-5d4e-418d-a362-c80a093de4dd",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "## save to analysis notebook\n",
+ "chart functions should stay in the analysis notebook since the charts only exist in the analysis notebook\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "0b2be581-f4e9-4f7e-bde5-01f2de183479",
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "ModuleNotFoundError",
+ "evalue": "No module named 'cost_per_bus_nb_scripts'",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[12], line 7\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mseaborn\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01msns\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mshared_utils\u001b[39;00m\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcost_per_bus_nb_scripts\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;241m*\u001b[39m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mIPython\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdisplay\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Markdown, display\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mticker\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ScalarFormatter\n",
+ "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'cost_per_bus_nb_scripts'"
+ ]
+ }
+ ],
+ "source": [
+ "## moved to analysis NB 6/25\n",
+ "import matplotlib.pyplot as plt\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import seaborn as sns\n",
+ "import shared_utils\n",
+ "from cost_per_bus_nb_scripts import *\n",
+ "from IPython.display import Markdown, display\n",
+ "from matplotlib.ticker import ScalarFormatter\n",
+ "from scipy.stats import zscore\n",
+ "\n",
+ "def new_cpb_aggregate(df: pd.DataFrame, column=\"transit_agency\") -> pd.DataFrame:\n",
+ " \"\"\"\n",
+ " function to aggregate compiled data by different categories:\n",
+ " \"transit agency\", \n",
+ " \"propulsion type\", \n",
+ " \"bus_size_type\",\n",
+ " \"new_project_type\"\n",
+ " aggregate on columns:\n",
+ " \"project_title\"\n",
+ " \"ppno\"\n",
+ " \"total_cost\"\n",
+ " \"bus_count\"\n",
+ " \n",
+ " Then, cost per bus is calculated AFTER the aggregation.\n",
+ " \"\"\"\n",
+ " df_agg = (\n",
+ " df.groupby(column)\n",
+ " .agg(\n",
+ " total_project_count=(\"project_title\", \"count\"),\n",
+ " total_project_count_ppno=(\"ppno\", \"count\"),\n",
+ " total_agg_cost=(\"total_cost\", \"sum\"),\n",
+ " total_bus_count=(\"bus_count\", \"sum\"),\n",
+ " #new_prop_type=(\"prop_type\",\"max\")\n",
+ " )\n",
+ " .reset_index()\n",
+ " )\n",
+ " df_agg[\"new_cost_per_bus\"] = (df_agg[\"total_agg_cost\"] / df_agg[\"total_bus_count\"]).astype(\"int64\")\n",
+ " \n",
+ " #calculate zscore\n",
+ " df_agg[\"new_zscore_cost_per_bus\"] = zscore(df_agg[\"new_cost_per_bus\"])\n",
+ " \n",
+ " #flag outliers\n",
+ " df_agg[\"new_is_cpb_outlier?\"] = df_agg[\"new_zscore_cost_per_bus\"].apply(outlier_flag)\n",
+ " \n",
+ " return df_agg\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fb1ae513-a8bf-4eb1-9e7b-71f828ebb9ea",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "## moved to analysis NB 6/25\n",
+ "def make_chart(y_col: str, title: str, data: pd.DataFrame, x_col: str):\n",
+ " \"\"\"\n",
+ " function to create chart. sorts values by y_col ascending.\"\"\"\n",
+ " \n",
+ " data.sort_values(by=y_col, ascending=False).head(10).plot(\n",
+ " x=x_col, y=y_col, kind=\"bar\", color=\"skyblue\"\n",
+ " )\n",
+ " plt.title(title)\n",
+ " plt.xlabel(x_col)\n",
+ " plt.ylabel(y_col)\n",
+ "\n",
+ " plt.ticklabel_format(style=\"plain\", axis=\"y\")\n",
+ " plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4a5bc209-a660-4c18-86b0-574640391a7d",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "## moved to analysis NB 6/25\n",
+ "import seaborn as sns\n",
+ "import matplotlib.pyplot as plt\n",
+ "from matplotlib.ticker import ScalarFormatter\n",
+ "\n",
+ "def dist_curve(\n",
+ " df: pd.DataFrame,\n",
+ " mean: str,\n",
+ " std: str,\n",
+ " title: str,\n",
+ " xlabel: str,\n",
+ "):\n",
+ " \"\"\"\n",
+ " function to make distribution curve. uses the \"cpb\" column of the df.\n",
+ " \"\"\"\n",
+ " sns.histplot(df[\"cost_per_bus\"], kde=True, color=\"skyblue\", bins=20)\n",
+ " # mean line\n",
+ " plt.axvline(\n",
+ " mean, color=\"red\", linestyle=\"dashed\", linewidth=2, label=f\"Mean: ${mean:,.2f}\"\n",
+ " )\n",
+ " # mean+1std\n",
+ " plt.axvline(\n",
+ " mean + std,\n",
+ " color=\"green\",\n",
+ " linestyle=\"dashed\",\n",
+ " linewidth=2,\n",
+ " label=f\"Standard Deviation: ${std:,.2f}\",\n",
+ " )\n",
+ " plt.axvline(mean - std, color=\"green\", linestyle=\"dashed\", linewidth=2)\n",
+ " plt.axvline(mean + (std * 2), color=\"green\", linestyle=\"dashed\", linewidth=2)\n",
+ " plt.axvline(mean + (std * 3), color=\"green\", linestyle=\"dashed\", linewidth=2)\n",
+ "\n",
+ " plt.title(title + \" with Mean and Standard Deviation\")\n",
+ " plt.xlabel(xlabel)\n",
+ " plt.ylabel(\"Frequency\")\n",
+ "\n",
+ " # Turn off scientific notation on x-axis?\n",
+ " plt.gca().xaxis.set_major_formatter(ScalarFormatter(useMathText=False))\n",
+ "\n",
+ " plt.legend()\n",
+ " plt.show()\n",
+ "\n",
+ " return\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "97bdb85b-ecaa-4634-8ea1-02ebc630567f",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "# Chagnes to current grant type scripts\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "84e4d8ed-131b-4a03-a2af-55c2bd4efc66",
+ "metadata": {
+ "jp-MarkdownHeadingCollapsed": true,
+ "tags": []
+ },
+ "source": [
+ "## FTA Script"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ea9c3269-d53d-4d94-bc22-c6768cb63d91",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# FTA\n",
+ "# copied over 6/25/2024\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import shared_utils\n",
+ "from calitp_data_analysis.sql import to_snakecase\n",
+ "from bus_cost_utils import *\n",
+ "# from dgs_data_cleaner import new_bus_size_finder, new_prop_finder, project_type_checker\n",
+ "#from tircp_data_cleaner import col_row_updater\n",
+ "\n",
+ "def col_splitter(\n",
+ " df: pd.DataFrame, \n",
+ " col_to_split: str, \n",
+ " new_col1: str, \n",
+ " new_col2: str, \n",
+ " split_char: str\n",
+ ")-> pd.DataFrame:\n",
+ " \"\"\"\n",
+ " function to split a column into 2 columns by specific character.\n",
+ " ex. split 100(beb) to \"100\" & \"(beb)\"\n",
+ " \"\"\"\n",
+ " df[[new_col1, new_col2]] = df[col_to_split].str.split(\n",
+ " pat=split_char, n=1, expand=True\n",
+ " )\n",
+ "\n",
+ " df[new_col2] = df[new_col2].str.replace(\")\", \"\")\n",
+ "\n",
+ " return df\n",
+ "\n",
+ "def fta_agg_bus_only(df: pd.DataFrame) -> pd.DataFrame:\n",
+ " \"\"\"\n",
+ " filters FTA data to only show projects with bus procurement (bus count > 0).\n",
+ " then filters projects for new_project_type = bus only\n",
+ " then aggregates\n",
+ " \"\"\"\n",
+ " df1 = df[(df[\"bus_count\"] > 0) & (df[\"new_project_type\"] == \"bus only\")]\n",
+ "\n",
+ " df2 = (\n",
+ " df1.groupby(\n",
+ " [\n",
+ " \"project_sponsor\",\n",
+ " \"project_title\",\n",
+ " \"new_prop_type_finder\",\n",
+ " \"new_bus_size_type\",\n",
+ " \"description\",\n",
+ " \"new_project_type\"\n",
+ " ]\n",
+ " )\n",
+ " .agg(\n",
+ " {\n",
+ " \"funding\": \"sum\",\n",
+ " \"bus_count\": \"sum\",\n",
+ " }\n",
+ " )\n",
+ " .reset_index()\n",
+ " )\n",
+ "\n",
+ " return df2\n",
+ "\n",
+ "def clean_fta_columns() -> pd.DataFrame:\n",
+ " \"\"\"\n",
+ " Main function to clean FTA data. Reads in data, changes datatypes, change specific values.\n",
+ " \"\"\"\n",
+ " # params\n",
+ " \n",
+ " file = \"raw_data-analyses_bus_procurement_cost_fta_press_release_data_csv.csv\"\n",
+ "\n",
+ " # read in data\n",
+ " df = pd.read_csv(f\"{GCS_PATH}{file}\")\n",
+ "\n",
+ " # snakecase df\n",
+ " df = to_snakecase(df)\n",
+ "\n",
+ " # clean funding values\n",
+ " df[\"funding\"] = (\n",
+ " df[\"funding\"]\n",
+ " .str.replace(\"$\", \"\")\n",
+ " .str.replace(\",\", \"\")\n",
+ " .str.strip()\n",
+ " )\n",
+ "\n",
+ " # rename initial propulsion type col to propulsion category\n",
+ " df = df.rename(columns={\"propulsion_type\": \"prosulsion_category\"})\n",
+ "\n",
+ " # splitting `approx_#_of_buses` col to get bus count\n",
+ " df1 = col_splitter(df, \"approx_#_of_buses\", \"bus_count\", \"extract_prop_type\", \"(\")\n",
+ "\n",
+ " # assign new columns via new_prop_finder and new_bus_size_finder\n",
+ " df2 = df1.assign(\n",
+ " new_prop_type_finder=df1[\"description\"].apply(new_prop_finder),\n",
+ " new_bus_size_type=df1[\"description\"].apply(new_bus_size_finder),\n",
+ " new_project_type=df1[\"description\"].apply(project_type_finder)\n",
+ " )\n",
+ "\n",
+ " # cleaning specific values\n",
+ " col_row_updater(df2, \"funding\", \"7443765\", \"bus_count\", 56)\n",
+ " col_row_updater(df2, \"funding\", \"17532900\", \"bus_count\", 12)\n",
+ " col_row_updater(df2, \"funding\", \"40402548\", \"new_prop_type_finder\", \"CNG\")\n",
+ " col_row_updater(df2, \"funding\", \"30890413\", \"new_prop_type_finder\", \"mix (zero and low emission)\")\n",
+ " col_row_updater(df2, \"funding\", \"29331665\", \"new_prop_type_finder\", \"mix (zero and low emission)\")\n",
+ " col_row_updater(df2, \"funding\", \"7598425\", \"new_prop_type_finder\", \"mix (zero and low emission)\")\n",
+ " col_row_updater(df2, \"funding\", \"7443765\", \"new_prop_type_finder\", \"mix (zero and low emission)\")\n",
+ " col_row_updater(df2, \"funding\", \"3303600\", \"new_prop_type_finder\", \"mix (diesel and gas)\")\n",
+ " col_row_updater(df2, \"funding\", \"2063160\", \"new_prop_type_finder\", \"low emission (hybrid)\")\n",
+ " col_row_updater(df2, \"funding\", \"1760000\", \"new_prop_type_finder\", \"low emission (propane)\")\n",
+ " col_row_updater(df2, \"funding\", \"1006750\", \"new_prop_type_finder\", \"ethanol\")\n",
+ " col_row_updater(df2, \"funding\", \"723171\", \"new_prop_type_finder\", \"low emission (propane)\")\n",
+ " col_row_updater(df2, \"funding\", \"23280546\", \"new_prop_type_finder\", \"BEB\")\n",
+ "\n",
+ " # update data types\n",
+ " update_cols = [\"funding\", \"bus_count\"]\n",
+ "\n",
+ " df2[update_cols] = df2[update_cols].astype(\"int64\")\n",
+ "\n",
+ " return df2\n",
+ "\n",
+ "#if __name__ == \"__main__\":\n",
+ "\n",
+ " # initial df (all projects)\n",
+ "# all_projects = clean_fta_columns()\n",
+ "\n",
+ " # projects with bus count > 0 only.\n",
+ "# just_bus = fta_agg_bus_only(all_projects)\n",
+ "\n",
+ " # export both DFs\n",
+ "# all_projects.to_parquet(f\"{GCS_PATH}clean_fta_all_projects.parquet\")\n",
+ "# just_bus.to_parquet(f\"{GCS_PATH}clean_fta_bus_only.parquet\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0a60d451-7532-4053-b0de-3fc7c5a55792",
+ "metadata": {
+ "jp-MarkdownHeadingCollapsed": true,
+ "tags": []
+ },
+ "source": [
+ "## TIRCP script"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4e16119d-f6f3-478b-a419-7c4989557910",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# TIRCP\n",
+ "## copied over 6/25/24\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import shared_utils\n",
+ "from calitp_data_analysis.sql import to_snakecase\n",
+ "from bus_cost_utils import *\n",
+ "\n",
+ "def clean_tircp_columns() -> pd.DataFrame:\n",
+ " \"\"\"\n",
+ " main function that reads in and cleans TIRCP data.\n",
+ " \"\"\"\n",
+ " \n",
+ " file_name = \"raw_TIRCP Tracking Sheets 2_1-10-2024.xlsx\"\n",
+ " tircp_name = \"Project Tracking\"\n",
+ "\n",
+ " # read in data\n",
+ " df = pd.read_excel(f\"{GCS_PATH}{file_name}\", sheet_name=tircp_name)\n",
+ "\n",
+ " # keep specific columns\n",
+ " keep_col = [\n",
+ " \"Award Year\",\n",
+ " \"Project #\",\n",
+ " \"Grant Recipient\",\n",
+ " \"Project Title\",\n",
+ " \"PPNO\",\n",
+ " \"District\",\n",
+ " \"County\",\n",
+ " \"Project Description\",\n",
+ " \"bus_count\",\n",
+ " \"Master Agreement Number\",\n",
+ " \"Total Project Cost\",\n",
+ " \"TIRCP Award Amount ($)\",\n",
+ " ]\n",
+ "\n",
+ " df1 = df[keep_col]\n",
+ "\n",
+ " # snakecase\n",
+ " df2 = to_snakecase(df1)\n",
+ "\n",
+ " # dict of replacement values\n",
+ " value_replace_dict = {\n",
+ " \"Antelope Valley Transit Authority \": \"Antelope Valley Transit Authority (AVTA)\",\n",
+ " \"Humboldt Transit Authority\": \"Humboldt Transit Authority (HTA)\",\n",
+ " \"Orange County Transportation Authority\": \"Orange County Transportation Authority (OCTA)\",\n",
+ " \"Capitol Corridor Joint Powers Authority\": \"Capitol Corridor Joint Powers Authority (CCJPA)\",\n",
+ " \"Los Angeles County Metropolitan Transportation Authority\": \"Los Angeles County Metropolitan Transportation Authority (LA Metro)\",\n",
+ " \"Monterey-Salinas Transit\": \"Monterey-Salinas Transit District (MST)\",\n",
+ " \"Sacramento Regional Transit (SacRT)\": \"Sacramento Regional Transit District (SacRT)\",\n",
+ " \"Sacramento Regional Transit District\": \"Sacramento Regional Transit District (SacRT)\",\n",
+ " \"Sacramento Regional Transit District (SacRT) \": \"Sacramento Regional Transit District (SacRT)\",\n",
+ " \"San Diego Association of Governments\": \"San Diego Association of Governments (SANDAG)\",\n",
+ " \"Santa Clara Valley Transportation Authority (SCVTA)\": \"Santa Clara Valley Transportation Authority (VTA)\",\n",
+ " \"Southern California Regional Rail Authority (SCRRA)\": \"Southern California Regional Rail Authority (SCRRA - Metrolink)\",\n",
+ " \"Southern California Regional Rail Authority\": \"Southern California Regional Rail Authority (SCRRA - Metrolink)\",\n",
+ " \"3, 4\": \"VAR\",\n",
+ " }\n",
+ " \n",
+ " # replacing values in agency & county col\n",
+ " df3 = df2.replace(\n",
+ " {\"grant_recipient\": value_replace_dict}\n",
+ " ).replace(\n",
+ " {\"county\": value_replace_dict}\n",
+ " )\n",
+ " \n",
+ " # using update function to update values at specific columns and rows\n",
+ " col_row_updater(df3, 'ppno', 'CP106', 'bus_count', 42)\n",
+ " col_row_updater(df3, 'ppno', 'CP005', 'bus_count', 29)\n",
+ " col_row_updater(df3, 'ppno', 'CP028', 'bus_count', 12)\n",
+ " col_row_updater(df3, 'ppno', 'CP048', 'bus_count', 5)\n",
+ " col_row_updater(df3, 'ppno', 'CP096', 'bus_count', 6)\n",
+ " col_row_updater(df3, 'ppno', 'CP111', 'bus_count', 5)\n",
+ " col_row_updater(df3, 'ppno', 'CP130', 'bus_count', 7)\n",
+ " col_row_updater(df3, 'total_project_cost', 203651000, 'bus_count', 8)\n",
+ " \n",
+ " # columns to change dtype to str\n",
+ " dtype_update = [\n",
+ " 'ppno',\n",
+ " 'district'\n",
+ " ]\n",
+ " \n",
+ " df3[dtype_update] = df3[dtype_update].astype('str')\n",
+ " \n",
+ " # assigning new columns using imported functions.\n",
+ " df4 = df3.assign(\n",
+ " prop_type = df3['project_description'].apply(new_prop_finder),\n",
+ " bus_size_type = df3['project_description'].apply(new_bus_size_finder),\n",
+ " new_project_type = df3['project_description'].apply(project_type_finder)\n",
+ " )\n",
+ "\n",
+ " return df4\n",
+ "\n",
+ "def tircp_agg_bus_only(df: pd.DataFrame) -> pd.DataFrame:\n",
+ " \"\"\"\n",
+ " filters df to only include projects with bus procurement and for project type = bus only \n",
+ " does not include engineering, planning or construction only projects.\n",
+ " then, aggregates the df by agency name and ppno. Agencies may have multiple projects that procure different types of buses\n",
+ " \"\"\"\n",
+ " df2 = df[\n",
+ " (df[\"bus_count\"] > 0) & (df[\"new_project_type\"] == \"bus only\")\n",
+ " ]\n",
+ " \n",
+ " df3 = (\n",
+ " df2.groupby(\n",
+ " [\n",
+ " \"grant_recipient\",\n",
+ " \"ppno\",\n",
+ " \"prop_type\",\n",
+ " \"bus_size_type\",\n",
+ " \"project_description\",\n",
+ " \"new_project_type\"\n",
+ " ]\n",
+ " )\n",
+ " .agg({\"total_project_cost\": \"sum\", \"bus_count\": \"sum\"})\n",
+ " .reset_index()\n",
+ " )\n",
+ " return df3\n",
+ "\n",
+ "#if __name__ == \"__main__\":\n",
+ " \n",
+ " \n",
+ " \n",
+ " # initial df\n",
+ "# df1 = clean_tircp_columns()\n",
+ " \n",
+ " # aggregate \n",
+ "# df2 = tircp_agg_bus_only(df1)\n",
+ " \n",
+ " # export both df's as parquets to GCS\n",
+ "# df1.to_parquet(f'{GCS_PATH}clean_tircp_all_project.parquet')\n",
+ "# df2.to_parquet(f'{GCS_PATH}clean_tircp_bus_only_clean.parquet')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0a57f455-8b86-47c4-9cda-2114cac504db",
+ "metadata": {
+ "jp-MarkdownHeadingCollapsed": true,
+ "tags": []
+ },
+ "source": [
+ "## DGS Script"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "359f3b7a-d691-446f-9a14-424c47fc0929",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# DGS\n",
+ "# over wrote 6/25\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import shared_utils\n",
+ "from calitp_data_analysis.sql import to_snakecase\n",
+ "from bus_cost_utils import *\n",
+ "\n",
+ "def calculate_total_cost(row):\n",
+ " \"\"\"\n",
+ " Calculate new column for total cost by checking if total_with_options_per_unit is present or not.\n",
+ " if not, then calculate using contract_unit_price.\n",
+ " to be used with .assign()\n",
+ " \"\"\"\n",
+ " if row[\"total_with_options_per_unit\"] > 0:\n",
+ " return row[\"total_with_options_per_unit\"] * row[\"quantity\"]\n",
+ " else:\n",
+ " return row[\"contract_unit_price\"] * row[\"quantity\"]\n",
+ " \n",
+ "def clean_dgs_columns() -> pd.DataFrame:\n",
+ " \"\"\"\n",
+ " reads in 2 dgs sheets, adds source column, merges both DFs, snakecase columns, update dtypes for monetary columns.\n",
+ " merged first becaues the snakecase function messes with the dtypes for some reason\n",
+ " \"\"\"\n",
+ " \n",
+ " \n",
+ " \n",
+ " # params\n",
+ " file_17c = \"raw_17c compiled-Proterra Compiled Contract Usage Report .xlsx\"\n",
+ " file_17b = \"raw_17b compiled.xlsx\"\n",
+ " sheet_17c = \"Proterra \"\n",
+ " sheet_17b = \"Usage Report Template\"\n",
+ "\n",
+ " # merge columns for dataframes\n",
+ " merge_col = [\n",
+ " \"Agency Billing Code\",\n",
+ " \"Contract Line Item Number (CLIN) (RFP ID)\",\n",
+ " \"Contract Unit Price\",\n",
+ " \"Delivery Date\",\n",
+ " \"Extended Contract Price Paid\",\n",
+ " \"Index Date / Catalog Version\",\n",
+ " \"Item Description\",\n",
+ " \"List Price/MSRP\",\n",
+ " \"Manufacturer (OEM)\",\n",
+ " \"Manufacturer Part Number (OEM #)\",\n",
+ " \"Ordering Agency Name\",\n",
+ " \"Purchase Order Date\",\n",
+ " \"Purchase Order Number\",\n",
+ " \"Purchasing Authority Number (for State departments)\",\n",
+ " \"Quantity in \\nUnit of Measure\\n\",\n",
+ " \"Quantity\",\n",
+ " \"source\",\n",
+ " \"State (S) or Local (L) agency\",\n",
+ " \"Unit of Measure\",\n",
+ " \"UNSPSC Code\\n(Version 10)\",\n",
+ " \"Supplier Contract Usage ID\",\n",
+ " ]\n",
+ "\n",
+ " # columns to change dtype\n",
+ " to_int64 = [\n",
+ " \"contract_unit_price\",\n",
+ " \"extended_contract_price_paid\",\n",
+ " \"total_with_options_per_unit\",\n",
+ " \"grand_total\",\n",
+ " ]\n",
+ " \n",
+ " # read in data\n",
+ " dgs_17c = pd.read_excel(f\"{GCS_PATH}{file_17c}\", sheet_name=sheet_17c)\n",
+ " dgs_17b = pd.read_excel(f\"{GCS_PATH}{file_17b}\", sheet_name=sheet_17b)\n",
+ "\n",
+ " # add new column to identify source\n",
+ " dgs_17c[\"source\"] = \"17c\"\n",
+ " dgs_17b[\"source\"] = \"17b\"\n",
+ "\n",
+ " # merge\n",
+ " dgs_17bc = pd.merge(dgs_17b, dgs_17c, how=\"outer\", on=merge_col).fillna(0)\n",
+ "\n",
+ " # snakecase\n",
+ " dgs_17bc = to_snakecase(dgs_17bc)\n",
+ "\n",
+ " # takes list of columns and updates to int64\n",
+ " dgs_17bc[to_int64] = dgs_17bc[to_int64].astype(\"int64\")\n",
+ "\n",
+ " # change purchase_order_number col to str\n",
+ " dgs_17bc[\"purchase_order_number\"] = dgs_17bc[\"purchase_order_number\"].astype(\"str\")\n",
+ "\n",
+ " # adds 3 new columns from functions\n",
+ " dgs_17bc2 = dgs_17bc.assign(\n",
+ " total_cost=dgs_17bc.apply(calculate_total_cost, axis=1),\n",
+ " new_prop_type=dgs_17bc[\"item_description\"].apply(new_prop_finder),\n",
+ " new_bus_size=dgs_17bc[\"item_description\"].apply(new_bus_size_finder),\n",
+ " )\n",
+ "\n",
+ " return dgs_17bc2\n",
+ "\n",
+ "def dgs_agg_by_agency(df: pd.DataFrame) -> pd.DataFrame:\n",
+ " \"\"\"\n",
+ " function that aggregates the DGS data frame by transit agency and purchase order number (PPNO) to get total cost of just buses without options.\n",
+ " first, dataframe is filtered for rows containing buses (does not include rows with 'not specified').\n",
+ " then, group by agency, PPNO, prop type and bus size. and aggregate the quanity and total cost of just buses.\n",
+ " Possible for agencies to have multiple PPNOs for different bus types and sizes.\n",
+ " \"\"\"\n",
+ " # filter for rows containing bus, does not include accessories/warranties/parts/etc.\n",
+ " agg_agency_bus_count = df[~df[\"new_prop_type\"].str.contains(\"not specified\")]\n",
+ "\n",
+ " agg_agency_bus_count2 = agg_agency_bus_count[\n",
+ " [\n",
+ " \"ordering_agency_name\",\n",
+ " \"purchase_order_number\",\n",
+ " \"item_description\",\n",
+ " \"quantity\",\n",
+ " \"source\",\n",
+ " \"total_cost\",\n",
+ " \"new_prop_type\",\n",
+ " \"new_bus_size\",\n",
+ " ]\n",
+ " ]\n",
+ "\n",
+ " agg_agency_bus_count3 = (\n",
+ " agg_agency_bus_count2.groupby(\n",
+ " [\n",
+ " \"ordering_agency_name\",\n",
+ " \"purchase_order_number\",\n",
+ " \"new_prop_type\",\n",
+ " \"new_bus_size\",\n",
+ " ]\n",
+ " )\n",
+ " .agg(\n",
+ " {\n",
+ " \"quantity\": \"sum\",\n",
+ " \"total_cost\": \"sum\",\n",
+ " \"source\": \"max\",\n",
+ " }\n",
+ " )\n",
+ " .reset_index()\n",
+ " )\n",
+ "\n",
+ " return agg_agency_bus_count3\n",
+ "\n",
+ "def dgs_agg_by_agency_w_options(df: pd.DataFrame) -> pd.DataFrame:\n",
+ " \"\"\"\n",
+ " similar to the previous function, aggregates the DGS dataframe by transit agency to get total cost of buses with options.\n",
+ " agencies may order buses with different configurations, resulting in different total cost.\n",
+ " function creates 1 df of only buses to retain initial proulsion type, size type and quanity of buses.\n",
+ " then, creates 2nd df of aggregated total cost of buses+options, by transit agency.\n",
+ " lastly, both df's are merged together.\n",
+ " \"\"\"\n",
+ " # filter df for rows NOT containing 'not specified'. only returns rows with buses\n",
+ " dfa = df[~df[\"new_prop_type\"].str.contains(\"not specified\")]\n",
+ "\n",
+ " # keep specific columns\n",
+ " df2 = dfa[\n",
+ " [\n",
+ " \"ordering_agency_name\",\n",
+ " \"purchase_order_number\",\n",
+ " \"quantity\",\n",
+ " \"new_prop_type\",\n",
+ " \"new_bus_size\",\n",
+ " \"source\",\n",
+ " ]\n",
+ " ]\n",
+ "\n",
+ " # aggregate by agency and PPNO, get total cost of buses with options\n",
+ " df3 = (\n",
+ " df.groupby([\"ordering_agency_name\", \"purchase_order_number\"])\n",
+ " .agg({\"total_cost\": \"sum\"})\n",
+ " .reset_index()\n",
+ " )\n",
+ "\n",
+ " # merge both dataframes on agency and PPNO to get bus only rows & total cost with options.\n",
+ " merge = pd.merge(\n",
+ " df2, df3, on=[\"ordering_agency_name\", \"purchase_order_number\"], how=\"left\"\n",
+ " )\n",
+ "\n",
+ " return merge\n",
+ "\n",
+ "#if __name__ == \"__main__\":\n",
+ " \n",
+ "\n",
+ " # initial df\n",
+ "# df1 = clean_dgs_columns()\n",
+ " \n",
+ " #df of just bus cost (no options)\n",
+ "# just_bus = dgs_agg_by_agency(df1)\n",
+ " \n",
+ " #df of bus cost+options\n",
+ "# bus_w_options = dgs_agg_by_agency_w_options(df1)\n",
+ " \n",
+ " #export serperate df's as parquet to GCS\n",
+ "# just_bus.to_parquet(f'{GCS_PATH}clean_dgs_all_projects.parquet')\n",
+ "# bus_w_options.to_parquet(f'{GCS_PATH}clean_dgs_bus_only_w_options.parquet')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "24ab982a-afff-4c07-a19a-703ab82d27b1",
+ "metadata": {
+ "jp-MarkdownHeadingCollapsed": true,
+ "tags": []
+ },
+ "source": [
+ "## cost_per_bus_cleaner / all_bus_cost_cleaner"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "19f4bd75-f614-4937-880a-1e1a6ff2eb7f",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# cost per bus cleaner\n",
+ "# rename to all_bus_cost_cleaner?\n",
+ "# overwrote 6/25/24\n",
+ "\n",
+ "import pandas as pd\n",
+ "from bus_cost_utils import *\n",
+ "from scipy.stats import zscore\n",
+ "\n",
+ "\n",
+ "\n",
+ "def prepare_all_data() ->pd.DataFrame:\n",
+ " \"\"\"\n",
+ " primary function to read-in, merge data across FTA, TIRCP and DGS data.\n",
+ " standardizes columns names, then exports as parquet.\n",
+ " \"\"\"\n",
+ " # variables for file names\n",
+ "\n",
+ "\n",
+ " \n",
+ " # dictionary to update columns names \n",
+ " col_dict = {\n",
+ " \"funding\": \"total_cost\",\n",
+ " \"grant_recipient\": \"transit_agency\",\n",
+ " \"new_bus_size\": \"bus_size_type\",\n",
+ " \"new_bus_size_type\": \"bus_size_type\",\n",
+ " \"new_prop_type\": \"prop_type\",\n",
+ " \"new_prop_type_finder\": \"prop_type\",\n",
+ " \"ordering_agency_name\": \"transit_agency\",\n",
+ " \"purchase_order_number\": \"ppno\",\n",
+ " \"quantity\": \"bus_count\",\n",
+ " \"total_project_cost\": \"total_cost\",\n",
+ " \"project_sponsor\": \"transit_agency\",\n",
+ " }\n",
+ "\n",
+ " # reading in data\n",
+ " # bus only projects for each datase\n",
+ " fta = pd.read_parquet(f\"{GCS_PATH}clean_fta_bus_only.parquet\")\n",
+ " tircp = pd.read_parquet(f\"{GCS_PATH}clean_tircp_bus_only_clean.parquet\")\n",
+ " dgs = pd.read_parquet(f\"{GCS_PATH}clean_dgs_bus_only_w_options.parquet\")\n",
+ " \n",
+ " # adding new column to identify source\n",
+ " fta[\"source\"] = \"fta\"\n",
+ " tircp[\"source\"] = \"tircp\"\n",
+ " dgs[\"source\"] = \"dgs\"\n",
+ "\n",
+ " # using .replace() with dictionary to update column names\n",
+ " fta2 = fta.rename(columns=col_dict)\n",
+ " tircp2 = tircp.rename(columns=col_dict)\n",
+ " dgs2 = dgs.rename(columns=col_dict)\n",
+ " \n",
+ " # merging fta2 and tircp 2\n",
+ " merge1 = pd.merge(fta2,\n",
+ " tircp2,\n",
+ " on=[\n",
+ " \"transit_agency\",\n",
+ " \"prop_type\",\n",
+ " \"bus_size_type\",\n",
+ " \"total_cost\",\n",
+ " \"bus_count\",\n",
+ " \"source\",\n",
+ " \"new_project_type\"\n",
+ " ],\n",
+ " how=\"outer\",\n",
+ " )\n",
+ " \n",
+ " # mergeing merge1 and dgs2\n",
+ " merge2 = pd.merge(merge1,\n",
+ " dgs2,\n",
+ " on=[\n",
+ " \"transit_agency\",\n",
+ " \"prop_type\",\n",
+ " \"bus_size_type\",\n",
+ " \"total_cost\",\n",
+ " \"bus_count\",\n",
+ " \"source\",\n",
+ " \"ppno\",\n",
+ " ],\n",
+ " how=\"outer\",\n",
+ " )\n",
+ " #normalizing data with cost per bus\n",
+ " #calculating cost per bus here\n",
+ " merge2[\"cost_per_bus\"] = (merge2[\"total_cost\"] / merge2[\"bus_count\"]).astype(\"int64\")\n",
+ " \n",
+ " #calculating zscore on cost per bus\n",
+ " merge2[\"zscore_cost_per_bus\"] = zscore(merge2[\"cost_per_bus\"])\n",
+ " \n",
+ " #flag any outliers\n",
+ " merge2[\"is_cpb_outlier?\"] = merge2[\"zscore_cost_per_bus\"].apply(outlier_flag)\n",
+ " return merge2\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "#if __name__ == \"__main__\":\n",
+ " \n",
+ " # initial df\n",
+ "# df1 = prepare_all_data()\n",
+ " #remove outliers based on cost per bus zscore\n",
+ "# df2 = df1[df1[\"is_cpb_outlier?\"]==False]\n",
+ " \n",
+ " # export to gcs\n",
+ " #full data, with outliers\n",
+ "# df1.to_parquet(f'{GCS_PATH}cleaned_cpb_analysis_data_merge.parquet')\n",
+ " # no outliers\n",
+ "# df2.to_parquet(f'{GCS_PATH}cleaned_no_outliers_cpb_analysis_data_merge.parquet')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9c163a75-eb4b-4a09-b035-7692f9ea68f5",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "# NB Variables rework\n",
+ "time to organize, cut down, consolidate variables"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c8fc1d6c-85b5-4890-84f1-66e33eb9d97c",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "## Notes Variable Categories\n",
+ "- initial DF stuff (all cleaned merged data)\n",
+ " - all_bus\n",
+ " - all_projecT_counter function\n",
+ "\n",
+ "lots of total counts\n",
+ "**this can be solved by filtering the same df but its different grant type, or using a table of groupby grant type and count of projects**\n",
+ "\n",
+ " COMPLETE count of all projects\n",
+ "- ~~all_project_count~~\n",
+ "- ~~total_bus_count~~\n",
+ "- ~~total_funding~~\n",
+ "- determined that all projects count isnt needed, total bus and funding is caclulated in multiple pivot tables.\n",
+ "\n",
+ " count of all projects for each grant type\n",
+ "- count_all_fta\n",
+ "- count_all_tircp\n",
+ "- count_all_dgs\n",
+ " fix: use all_project_count to create a pivot table with margins of each grant type. margins should also add a grand total col\n",
+ " \n",
+ " []count of bus only projects\n",
+ " - ~~bus_only_project_count~~ (this is in merged data)\n",
+ " \n",
+ " count of bus only projects for each grant type\n",
+ " - ~~bus_only_count_fta~~\n",
+ " - ~~bus_only_count_tircp~~\n",
+ " - ~~bus_only_count_dgs~~\n",
+ " fix: use all_project_count to create a summarized dataframe of each grant type\n",
+ "\n",
+ " \n",
+ " \n",
+ "- ZEB only\n",
+ " ~~- zeb_only_df function~~\n",
+ " switched to filtering the dataframe to get ZEB answers\n",
+ "\n",
+ "- non-ZEB only\n",
+ " ~~- non_zeb_only_df function~~\n",
+ " switched to filtering the dataframe\n",
+ " \n",
+ "- means and standard deviations\n",
+ " - switched to using weighted average for chart calculation\n",
+ " - for charts?\n",
+ "\n",
+ "- other things from the initial analysis to include/re-work?\n",
+ " - \n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3a718624-5c9e-463d-8de4-1a6f66c9e4d8",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "# Draft/Test cells"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b5224cfe-6b3a-4c68-a7b5-58df0ff8f85e",
+ "metadata": {
+ "jp-MarkdownHeadingCollapsed": true,
+ "tags": []
+ },
+ "source": [
+ "## Testing `new_cpb_aggregate` function against initial `cpb_appregate` function.\n",
+ "to make sure the core data matches, and expect the new function to provide zscores and outlier flags"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c9ffd5a9-b772-4509-b84c-9a96760b3112",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# making copy of final \n",
+ "# test = final #THIS DOES NOT WORK! this is just assigning a new name to final\n",
+ "test = pd.read_parquet(f'{GCS_PATH}old/cpb_analysis_data_merge.parquet')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f8aa3674-78fe-4ba9-8f5e-697d91ff4011",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# testing the improved cpb agg function\n",
+ "# default grouby column is `transit_agency`\n",
+ "\n",
+ "agg1 = new_cpb_aggregate(test)\n",
+ "\n",
+ "# there are some duplicate agencies in the inial DF, these get aggregated together after using the function\n",
+ "# the resulting DF is shorter\n",
+ "display(\n",
+ " agg1.shape,\n",
+ " agg1.columns\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d2bca15a-c12c-4cf5-a5a9-d591ee73a359",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# confirming the default cpb_agg is working\n",
+ "agg1.sample(3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "637032e4-d855-4190-a6f5-ff695f77143f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# agg looks good\n",
+ "# double checked it against the old agg function, CPB numbers match between this new function and old one\n",
+ "display(\n",
+ "\n",
+ " agg1[\"new_is_cpb_outlier?\"].value_counts(),\n",
+ " agg1[\"new_zscore_cost_per_bus\"].min(),\n",
+ " agg1[\"new_zscore_cost_per_bus\"].max(),\n",
+ " agg1[agg1[\"new_is_cpb_outlier?\"] == True]\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ce1f4c86-e85d-41d8-83f6-14aadce48d5c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# test to aggregate by other columns \n",
+ "agg_prop_type = new_cpb_aggregate(test,\"prop_type\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "59298193-fc78-4ffb-bfc3-326593c19edb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# need to compare to the agg prop data in the report\n",
+ "\n",
+ "from cost_per_bus_nb_scripts import cpb_aggregate, no_outliers\n",
+ "\n",
+ "old_prop_agg = cpb_aggregate(no_outliers, \"prop_type\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "01883fc0-4f6d-4e6a-a88f-97a5914b281b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#EVERYTHING CHECKS OUT!\n",
+ "display(\n",
+ " old_prop_agg.shape,\n",
+ " agg_prop_type.shape,\n",
+ " old_prop_agg.head(),\n",
+ " agg_prop_type.head()\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "52204752-3932-4ce6-98ac-de8ad3a1f8e8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# double checking the bus size agg new vs old\n",
+ "#EVERYTHING CHECKS OUT!\n",
+ "new_agg_bus_size = new_cpb_aggregate(test, \"bus_size_type\")\n",
+ "old_size_agg = cpb_aggregate(no_outliers, \"bus_size_type\")\n",
+ "display(\n",
+ " old_size_agg.shape,\n",
+ " new_agg_bus_size.shape,\n",
+ " old_size_agg,\n",
+ " new_agg_bus_size\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0391dd4d-23e1-49cb-8123-509954c796e8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#EVERYTHING CHECKS OUT!\n",
+ "# move forward with `new_cpb_aggregate` function\n",
+ "new_agg_agency = new_cpb_aggregate(test)\n",
+ "old_agency_agg = cpb_aggregate(no_outliers, \"transit_agency\")\n",
+ "display(\n",
+ " old_agency_agg.shape,\n",
+ " new_agg_agency.shape,\n",
+ " old_agency_agg.head(),\n",
+ " new_agg_agency.head()\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3428875a-6a64-41bc-8f9c-81902006d7f0",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "## Testing variables rework\n",
+ "there are a lot of variables in the initial script. need to cut down the amount of variables or at least make it more efficient. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f21298ee-0efb-4f91-ba63-55fc2645a4d2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# read in all cleaned project data without outliers\n",
+ "# cpb_analysis_data_merge is bus only projects. all DGS rows were Bus only projects anyways\n",
+ "\n",
+ "merged_data = pd.read_parquet(f'{GCS_PATH}cleaned_no_outliers_cpb_analysis_data_merge.parquet')\n",
+ "display(\n",
+ " merged_data.columns,\n",
+ " merged_data.shape,\n",
+ " merged_data.head(),\n",
+ " merged_data[\"zscore_cost_per_bus\"].agg([\"min\",\"max\"])\n",
+ "\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ece95fb7-cbb8-46bd-a5f9-2b68a47a4817",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "## moved to final NB\n",
+ "\n",
+ "# aggregating by big categories\n",
+ "agg_agency = new_cpb_aggregate(merged_data)\n",
+ "agg_prop = new_cpb_aggregate(merged_data, column=\"prop_type\")\n",
+ "agg_bus_size = new_cpb_aggregate(merged_data, column=\"bus_size_type\")\n",
+ "agg_source = new_cpb_aggregate(merged_data, column=\"source\")\n",
+ "\n",
+ "#overall agency info\n",
+ "display(\n",
+ " #min max,\n",
+ " agg_agency[\"new_cost_per_bus\"].agg([\"min\",\"max\"]),\n",
+ " agg_agency[\"total_bus_count\"].agg([\"min\",\"max\"]),\n",
+ " agg_agency[\"total_agg_cost\"].agg([\"min\",\"max\"]),\n",
+ " agg_agency[\"new_zscore_cost_per_bus\"].agg([\"min\",\"max\"]),\n",
+ " \n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "03940ccb-d1e6-439d-a930-13dae17537b2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "display(\n",
+ " merged_data.shape,\n",
+ " agg_agency.shape,\n",
+ " merged_data.head(),\n",
+ " agg_agency.head()\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1696d78f-7018-417b-9847-d82edac3acdf",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# testing pivot table on `merged_data`\n",
+ "# moved to final NB\n",
+ "#pivot table to get totals for each prop type\n",
+ "pivot_prop_type = pd.pivot_table(\n",
+ " merged_data,\n",
+ " values = [\"bus_count\", \"total_cost\"],\n",
+ " index = \"prop_type\",\n",
+ " aggfunc = \"sum\",\n",
+ " margins = True,\n",
+ " margins_name = \"Grand Total\"\n",
+ ").reset_index()\n",
+ "pivot_prop_type[\"cost_per_bus\"] = (pivot_prop_type[\"total_cost\"] / pivot_prop_type[\"bus_count\"]).astype(\"int64\")\n",
+ "\n",
+ "display(\n",
+ " #from new_cpb_agg\n",
+ " agg_prop[[\"prop_type\",\"total_agg_cost\",\"total_bus_count\",\"new_cost_per_bus\"]],\n",
+ " #pivot\n",
+ " pivot_prop_type\n",
+ ")\n",
+ "# same data, dont need the pivot table anymore, but the pivot table does have grand total"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ca709e43-4947-4a34-970f-216d4b6ab7cc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#moved to final NB 6/25\n",
+ "\n",
+ "#pivot table to get grand total for zeb/non-zeb only data\n",
+ "\n",
+ "# keep this\n",
+ "zeb_list =[\n",
+ " \"BEB\",\n",
+ " \"FCEB\",\n",
+ " \"electric (not specified)\",\n",
+ " \"zero-emission bus (not specified)\",\n",
+ "]\n",
+ "\n",
+ "zeb_projects = merged_data[merged_data[\"prop_type\"].isin(zeb_list)]\n",
+ "\n",
+ "#keep this\n",
+ "non_zeb_list =[\n",
+ " \"CNG\",\n",
+ " \"ethanol\",\n",
+ " \"low emission (hybrid)\",\n",
+ " \"low emission (propane)\",\n",
+ " \"mix (zero and low emission)\",\n",
+ "]\n",
+ "\n",
+ "non_zeb_projects = merged_data[merged_data[\"prop_type\"].isin(non_zeb_list)]\n",
+ "\n",
+ "#keep this\n",
+ "pivot_zeb_prop = pd.pivot_table(\n",
+ " #filted incoming DF for zeb prop types\n",
+ " zeb_projects,\n",
+ " values = [\"bus_count\", \"total_cost\"],\n",
+ " index = \"prop_type\",\n",
+ " aggfunc = \"sum\",\n",
+ " margins = True,\n",
+ " margins_name = \"Grand Total\"\n",
+ ").reset_index() \n",
+ "\n",
+ "pivot_zeb_prop[\"cost_per_bus\"] = (pivot_zeb_prop[\"total_cost\"] / pivot_zeb_prop[\"bus_count\"]).astype(\"int64\")\n",
+ "\n",
+ "#keep this\n",
+ "pivot_non_zeb_prop = pd.pivot_table(\n",
+ " #filted incoming DF for non-zeb prop types\n",
+ " non_zeb_projects,\n",
+ " values = [\"bus_count\", \"total_cost\"],\n",
+ " index = \"prop_type\",\n",
+ " aggfunc = \"sum\",\n",
+ " margins = True,\n",
+ " margins_name = \"Grand Total\"\n",
+ ").reset_index()\n",
+ "\n",
+ "pivot_non_zeb_prop[\"cost_per_bus\"] = (pivot_non_zeb_prop[\"total_cost\"] / pivot_non_zeb_prop[\"bus_count\"]).astype(\"int64\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "01647d83-8b4f-47a9-ab57-a1db7cd501dd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "display(\n",
+ " #zeb data 3 different methods\n",
+ " #1. filtering agg_prop by zeb list, no grand totas\n",
+ " #2. filtering pivot talbe by zeb list, without grand totals\n",
+ " #3. dedicated pivot table for zeb, with grand totals\n",
+ " #agg_prop[agg_prop[\"prop_type\"].isin(zeb_list)],\n",
+ " #pivot_prop_type.loc[zeb_list],\n",
+ " pivot_zeb_prop,\n",
+ " \n",
+ " #non-zeb same 3 methods\n",
+ " #agg_prop[agg_prop[\"prop_type\"].isin(non_zeb_list)],\n",
+ " #pivot_prop_type.loc[non_zeb_list],\n",
+ " pivot_non_zeb_prop\n",
+ ")\n",
+ "# confirmed all data is the same, but need pivot for grand total rows"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3be3ef4f-0bf3-4770-a8b7-340d372ae1ce",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# answers total buses sizes\n",
+ "pivot_size = pd.pivot_table(\n",
+ " merged_data,\n",
+ " values = [\"bus_count\", \"total_cost\"],\n",
+ " index = \"bus_size_type\",\n",
+ " aggfunc = \"sum\",\n",
+ " margins = True,\n",
+ " margins_name = \"Grand Total\"\n",
+ ").reset_index()\n",
+ "\n",
+ "pivot_size[\"cost_per_bus\"] = (pivot_size[\"total_cost\"] / pivot_size[\"bus_count\"]).astype(\"int64\")\n",
+ "\n",
+ "display(\n",
+ " agg_bus_size[[\"bus_size_type\",\"total_agg_cost\",\"total_bus_count\",\"new_cost_per_bus\"]],\n",
+ " pivot_size,\n",
+ " pivot_prop_type[pivot_prop_type[\"prop_type\"] == \"Grand Total\"]\n",
+ ")\n",
+ "\n",
+ "#same data, dont need pivot for this one because the grand totals will be the same as pivot_prop_type. \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2c933257-bdc2-4007-9571-58475118073c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# moved to final NB 6/25\n",
+ "\n",
+ "# answers total buses and cost per grant type\n",
+ "pivot_source = pd.pivot_table(\n",
+ " merged_data,\n",
+ " values = [\"bus_count\", \"total_cost\"],\n",
+ " index = \"source\",\n",
+ " aggfunc = \"sum\",\n",
+ " margins = True,\n",
+ " margins_name = \"Grand Total\"\n",
+ ").reset_index()\n",
+ "\n",
+ "pivot_source[\"cost_per_bus\"] = (pivot_source[\"total_cost\"] / pivot_source[\"bus_count\"]).astype(\"int64\")\n",
+ "\n",
+ "display(\n",
+ " agg_source[[\"source\",\"total_agg_cost\",\"total_bus_count\",\"new_cost_per_bus\"]],\n",
+ " pivot_source\n",
+ ")\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "11547020-dd35-4745-98f8-bbd02fccaa23",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "## Testing Charts\n",
+ "\n",
+ "using `merged_data`, now without outliers.\n",
+ "charts looking good, similar results to initial charts"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "aace38a4-3f2d-460d-a258-59efa659f852",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "merged_data.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4753f3ea-00b6-4d5e-a3f0-73b3d3593acb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# means and standard deviations\n",
+ "# for graphs\n",
+ "cpb_mean = merged_data[\"cost_per_bus\"].mean()\n",
+ "cpb_std = merged_data[\"cost_per_bus\"].std()\n",
+ "\n",
+ "#testing weighted average calculation for sub-set non-zeb and zeb\n",
+ "zeb_cpb_wt_avg = (merged_data[merged_data[\"prop_type\"].isin(zeb_list)][\"total_cost\"].sum() / merged_data[merged_data[\"prop_type\"].isin(zeb_list)][\"bus_count\"].sum())\n",
+ "non_zeb_cpb_wt_avg = (merged_data[merged_data[\"prop_type\"].isin(non_zeb_list)][\"total_cost\"].sum() / merged_data[merged_data[\"prop_type\"].isin(non_zeb_list)][\"bus_count\"].sum())\n",
+ "display(\n",
+ " cpb_mean,\n",
+ " cpb_std,\n",
+ " zeb_cpb_wt_avg,\n",
+ " non_zeb_cpb_wt_avg\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2007be9d-13ec-4d0d-a642-d9a42448b924",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# why is the average different when i use .mean() vs. total cost / bus cout\n",
+ "\n",
+ "display(\n",
+ " #this is the arithmatic mean, sums all the `cost_per_bus` rows, the divide by the number of rows. (row-wise)\n",
+ " zeb_projects[\"cost_per_bus\"].mean(),\n",
+ " \n",
+ " #this is like the accounting method of calculating average (Total Cost and Total Quantity Approach (Weighted Average))\n",
+ " pivot_zeb_prop,\n",
+ " \n",
+ " # calculating mean by weighted average the long way (total cost / total bus count, similar to pivot table)\n",
+ " (zeb_projects[\"total_cost\"].sum() / zeb_projects[\"bus_count\"].sum())\n",
+ ")\n",
+ "\n",
+ "# so the calculated grand total cost_per_bus is equivilent to the weighted average cost per bus\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8645cf77-b30a-4c45-b943-ac81e8b5a613",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# chart of all cost per bus in the analysis.\n",
+ "# moved to final NB 6/26\n",
+ "dist_curve(\n",
+ " df=merged_data,\n",
+ " mean=cpb_mean,\n",
+ " std=cpb_std,\n",
+ " title=\"all buses, cost per bus distribution\",\n",
+ " xlabel=\"cost per bus, $ million(s)\",\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cefa6800-df50-4eda-95f8-74363ef942d0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# ZEB cost per bus \n",
+ "# moved to final NB 6/26\n",
+ "dist_curve(\n",
+ " df=zeb_projects,\n",
+ " #using the accounting, weighted average approach to mean (total cost/total number of buses)\n",
+ " mean=zeb_cpb_wt_avg,\n",
+ " # need to investigate if std needs to be weighted as well?\n",
+ " std=zeb_projects[\"cost_per_bus\"].std(),\n",
+ " title=\"ZEB buses, cost per bus distribution\",\n",
+ " xlabel=\"cost per bus, $ million(s)\",\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "563304d2-2d98-44e6-b3a4-fd54f63fc0d8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# non-zeb cost per bus\n",
+ "# moved to final NB 6/26\n",
+ "dist_curve(\n",
+ " df=non_zeb_projects,\n",
+ " mean=non_zeb_cpb_wt_avg,\n",
+ " std=non_zeb_projects[\"cost_per_bus\"].std(),\n",
+ " title=\"non-ZEB costper bus Distribution\",\n",
+ " xlabel='\"cost per bus, $ million(s)\"',\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "aa916127-57d9-4c1c-b5eb-8b7b7e4ac672",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "agg_bus_size"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5f11c857-ddbe-4871-aeca-e27fa00fbde8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# multiple bar charts in one cell\n",
+ "# moved to final NB 6/26\n",
+ "\n",
+ "# cpb by prop type\n",
+ "make_chart(\"new_cost_per_bus\", \"Cost per bus by propulsion type\", x_col=\"prop_type\", data=agg_prop)\n",
+ "\n",
+ "# bus count by prop type\n",
+ "make_chart(\"total_bus_count\", \"Bus count by propulsion type\", x_col=\"prop_type\", data=agg_prop)\n",
+ "\n",
+ "#bus size bar chart\n",
+ "make_chart(\"total_bus_count\", \"\"\"Amount of buses procured by bus size.\n",
+ "excluding 'not specified' responses.\"\"\", x_col=\"bus_size_type\",data=agg_bus_size[agg_bus_size[\"bus_size_type\"]!=\"not specified\"])\n",
+ "\n",
+ "# pivot table to\n",
+ "agg_prop[[\"prop_type\",\"new_cost_per_bus\",\"total_bus_count\"]].sort_values(by=\"new_cost_per_bus\", ascending=False)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9270ab8f-25ff-4de3-aca5-7ef4637a4f9c",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "## Testing summary and conclusion\n",
+ "time to rework the summary section.\n",
+ "\n",
+ "no more long expositions and variables. try to get the same point across using tables instead."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2472461d-7663-4b66-9bde-4c2a199707a5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# moved to final NB 6/25\n",
+ "\n",
+ "new_summary = f\"\"\"\n",
+ "\n",
+ "# Bus Procurement Cost Analysis\n",
+ "\n",
+ "## Summary\n",
+ "This analysis examines the cost of buses for transit agencies across the county. Specifically, to observe the variation of bus cost for propulsion type and bus sizes.\n",
+ "\n",
+ "Data was compiled from three data sources:\n",
+ "1. FTA Bus and Low- and No-Emission Grant Awards press release (federally funded, nationwide data)\n",
+ "2. TIRCP project data (state-funded, California only)\n",
+ "3. DGS usage report for all procurements from California agencies purchasing from New Flyer and Portera Inc.. \n",
+ "\n",
+ "The initial dataset contained close to 300 projects, but was paired down due to projects including components other than buses. Examples include: projects that constructed new facilities, trainings or other non-bus related items like trains and sea farries were excluded.\n",
+ "The resulting dataset only contained projects that were solely used to procure buses. \n",
+ "\n",
+ "{len(merged_data)} projects were determined to contain solely bus purchases. \n",
+ "These projects were aggregated against propulsion type and bus size type, and categorized by ZEB and non-ZEB.\n",
+ "\n",
+ "\n",
+ "Breakdown of each data souce:\n",
+ "{pivot_source.to_markdown(index=False)}\n",
+ "\n",
+ "\n",
+ "**ZEB buses include:**\n",
+ "- zero-emission (not specified) \n",
+ "- electric (not specified)\n",
+ "- battery electric \n",
+ "- fuel cell electric\n",
+ "\n",
+ "**Non-ZEB buses include:**\n",
+ "- CNG \n",
+ "- ethanol \n",
+ "- ow emission (hybrid, propane) \n",
+ "- diesel \n",
+ "- gas\n",
+ "\n",
+ "Below are charts and tables that summarize the findings.\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\"\"\"\n",
+ "from IPython.display import Markdown, display\n",
+ "\n",
+ "display(Markdown(new_summary))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7472ba04-7def-46ef-814b-bf63c1016f3b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# moved to final NB 6/25\n",
+ "display(\n",
+ " Markdown(\"**ZEB Summary**\"),\n",
+ " pivot_zeb_prop,\n",
+ " \n",
+ " Markdown(\"**Non-ZEB Summary**\"),\n",
+ " pivot_non_zeb_prop,\n",
+ " \n",
+ " Markdown(\"The remaining buses did not specify a propulsion type\")\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "91d0361d-b165-4607-b22e-66ae4234863d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#min max values for all projects\n",
+ "bus_min_max_summary(data=agg_agency,col1=\"new_cost_per_bus\")\n",
+ "bus_min_max_summary(data=agg_agency,col1=\"total_bus_count\")\n",
+ "bus_min_max_summary(data=agg_agency,col1=\"total_agg_cost\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6896d09d-a8e8-4351-bf69-6538d031bf93",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# moved to final NB 6/25\n",
+ "## min max values of just ZEB projects\n",
+ "# YES I CAN!!\n",
+ "new_cols =[\n",
+ " \"transit_agency\",\n",
+ " \"prop_type\",\n",
+ " \"total_cost\",\n",
+ " \"bus_count\",\n",
+ " \"cost_per_bus\"]\n",
+ "\n",
+ "display(Markdown(\"**Which Agneices had the highest and lowest cost per bus?**\")),\n",
+ "bus_min_max_summary(data=zeb_projects, col1=\"cost_per_bus\", col_list=new_cols)\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "743b25a2-8693-44f7-98fe-384e910620a7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# moved to final NB 6/25\n",
+ "display(Markdown(\n",
+ " \"**Which agency procured the most and least amount of ZEBs?**\"\n",
+ "))\n",
+ "bus_min_max_summary(data=zeb_projects, col1=\"bus_count\", col_list=new_cols)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "45a95018-0ac8-450d-97d2-aa394e94779a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# moved to final NB 6/25\n",
+ "display(Markdown(\n",
+ " \"**Which Agency had the most and least total ZEB cost?**\"\n",
+ "))\n",
+ "bus_min_max_summary(data=zeb_projects, col1=\"total_cost\", col_list=new_cols)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ee2f5572-0683-4909-8e2a-deea22c006fb",
+ "metadata": {},
+ "source": [
+ "## conslusion"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e39c89a1-a726-44f9-808b-bcf936c77254",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# moved to final NB 6/25\n",
+ "conclusion = f\"\"\"\n",
+ "**Conclusion**\n",
+ "\n",
+ "Based on these findings, The average cost of a ZEB, throughout the US, is ~$1,000,000, roughly twice the price of a conventional, non-ZEB.\n",
+ "The variance in cost depends mainly on the options the Trasnit\n",
+ "Agencies chooses. Highly optioned/customized buses contribute to high cost.\n",
+ "Unfortunately, analyzing the cost of configuable options is outside the scope of data provided. \n",
+ "\"\"\"\n",
+ "display(\n",
+ " Markdown(conclusion)\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "414b0e56-4b8f-41ae-9e07-9223abf95c45",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/bus_procurement_cost/refactor_notes.md b/bus_procurement_cost/refactor_notes.md
new file mode 100644
index 000000000..51ddd7e4a
--- /dev/null
+++ b/bus_procurement_cost/refactor_notes.md
@@ -0,0 +1,52 @@
+# Refactor Notes
+* This is soft skill I want you to hone: what is the simplest way you can explain the analysis, and then back that statement up with code.
+* There is a lot of "variables"...aka code to generate the values you need in the summary text. The balance is heavily tipped that way. Basically 200 lines of code are dedicated to creating variables. There are 6 datasets that you read in, sometimes multiple times.
+ * Do you read each DGS dataset in twice because one is pre-cleaning, and one is post-cleaning? Same for FTA and TIRCP?
+ * What is the 7th dataset that is the merged one?
+* Challenge: for your Markdown paragraphs, can you write just 1 line of code for every variable you need? You can import your dfs once at the top.
+ * Refer to the Google Doc to see the desired table you want to create
+
+## Outline
+**Question:** how much do transit agencies pay to procure buses?
+**Ideal Table:** transit agency / grant recipients with the bus types (size, propulsion), unit bus cost, and number of buses purchased.
+
+Clear code can be read like a story. Each sentence can be developed with functions. The story of your analysis is roughly this:
+
+1. I have 3 datasets, one for each grant, FTA, TIRCP, DGS.
+ * Do these 3 datasets share columns? Can I put them all into 1 final table?
+ * What column do I need to add to distiguish between them?
+ * When I finish step 1, do I need to have 3 cleaned dfs or can I have just one?
+ * If I need 3 cleaned dfs, can I use a naming pattern to easily grab them later? (`fta_cleaned, tircp_cleaned, dgs_cleaned`)
+2. I have a long bus description of text, and I need to grab this information to populate my columns: bus size, bus propulsion, unit bus cost, number purchased.
+ * clean up description, remove extraneous spaces, make it all lower case
+ * I need a function (or two) for tagging all the bus propulsion types
+ * I need a function (or two) for tagging all the bus size types
+ * ... maybe for unit bus cost or number purchased?
+ * I used the bus propulsion function and tagged out the 5 types
+ * I used the bus size function and tagged out the 3 types.
+3. I have to populate some numeric columns
+4. I have to clean up outliers after finding z-scores.
+ * Do you want to drop them? Do you want to keep them and add a column called `outlier`? How have you been using it in your summary statement?
+5. Other cleaning, maybe agency names, etc etc
+6. Save out my cleaned df, and I will use this for all my charts, captions, paragraphs, etc.
+
+---
+
+## 6/24/24: Response to refactor notes and code review doc
+
+Came back to this project with fresh-eyes after a couple of months and started to see where a lot of improvements can be made. Got a lot more comfortable descriibing what I wanted to do and coming up with code/functions in a more readable way.
+
+It was easier to identify that I had a lot of circular dependencies amongst my cleaner scripts that would reference eachothers functions. I moved all the common functions to a new `bus_cost_utils` file to serve as an importable module for the other scripts.
+
+Next, realized that I had a lot of variables for my f-string in the final `cost_per_bus_analysis` notebook. These variables resulted in a lot of redundent steps like reading in the same data multiple times and sometimes resulted in multiple variables producing the same results multiple times, making navigating the file very difficult.
+
+Taking a step back, I realized that a simpler, "stripped down" notebook will be easier to understand. Settled on replacing the variables with tables or pivot-tables and focusing on ZEB related metrics. This approach helped cut down the amount of variables needed and consolidate down the information since tables/pivot-tables can help answer multiple quesions at the same time.
+
+## Overall steps taken this round of refactor
+* created `bus_cost_utils` module to move all the common functions and variables.
+* adjusted cleaner scripts to reference moduel.
+* gave cleaned datasets consistent naming convention (raw, cleaned, bus only) and identical column names to merge on.
+* final, merged dataset contains columnsfor z-score and an outlier flag. 1 set was saved with outliers, another saved with out outliers.
+* used the merged dataset without outliers for the final analysis notebook to create all pivot tables, charts and variables.
+* deleted old, initial exploratory notebooks.
+* reorganized GCS folder by moving old initial exports to an `/old` folder.
\ No newline at end of file
diff --git a/bus_procurement_cost/tircp_bus_analysis.ipynb b/bus_procurement_cost/tircp_bus_analysis.ipynb
deleted file mode 100644
index 3798e4c64..000000000
--- a/bus_procurement_cost/tircp_bus_analysis.ipynb
+++ /dev/null
@@ -1,2195 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "id": "7e2bb160-f2fa-49a0-9cbf-b0796599f1d1",
- "metadata": {},
- "outputs": [],
- "source": [
- "import matplotlib.pyplot as plt\n",
- "import pandas as pd\n",
- "import shared_utils\n",
- "from dgs_data_cleaner import project_type_checker\n",
- "\n",
- "# set_option to increase max rows displayed to 200, to see entire df in 1 go/\n",
- "pd.set_option(\"display.max_rows\", 200)\n",
- "pd.set_option('display.max_colwidth', None)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "9f3cd0cb-98d8-43a4-be8d-b7b41f80dd75",
- "metadata": {
- "jp-MarkdownHeadingCollapsed": true,
- "tags": []
- },
- "source": [
- "## AGREEMENT ALLOCATIONS SHEET DATA"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "52794293-2f66-47a7-b580-6170fc72ca94",
- "metadata": {
- "jp-MarkdownHeadingCollapsed": true,
- "tags": []
- },
- "source": [
- "### Agreement Allocations - Read in Raw data"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "70250817-0eee-4ab0-95b4-b43050b46f8c",
- "metadata": {},
- "outputs": [],
- "source": [
- "url = \"gs://calitp-analytics-data/data-analyses/bus_procurement_cost/TIRCP Tracking Sheets 2_1-10-2024.xlsx\"\n",
- "sheet_name = \"Agreement Allocations\""
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "f80fccec-2313-4a82-9529-5943ba26e401",
- "metadata": {},
- "outputs": [],
- "source": [
- "tircp = pd.read_excel(url, sheet_name)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "79002d63-c2a1-4218-81a2-31e6a10d981e",
- "metadata": {
- "jp-MarkdownHeadingCollapsed": true,
- "tags": []
- },
- "source": [
- "### Agreement Allocations -Data Cleaning and QC"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "78289f7c-54f3-48af-8881-101f50853694",
- "metadata": {},
- "outputs": [],
- "source": [
- "# reducing initialdf to first 11 columns.\n",
- "tircp = tircp.iloc[:, :12]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "6526421b-36f3-4326-bfce-4679ffcf2b52",
- "metadata": {},
- "outputs": [],
- "source": [
- "# dictionary for column name update\n",
- "new_col = [\n",
- " \"award_year\",\n",
- " \"project_#\",\n",
- " \"grant_recipient\",\n",
- " \"implementing_agency\",\n",
- " \"ppno\",\n",
- " \"project_id\",\n",
- " \"ea\",\n",
- " \"components\",\n",
- " \"#_of_buses\",\n",
- " \"phase\",\n",
- " \"allocation_amount\",\n",
- " \"expended_amount\",\n",
- "]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "c973346f-9232-4d21-8145-6fbe331094de",
- "metadata": {},
- "outputs": [],
- "source": [
- "tircp.columns = new_col\n",
- "tircp.columns"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "14061d68-8a66-42de-baad-c45e0b4664f5",
- "metadata": {},
- "outputs": [],
- "source": [
- "tircp = tircp.drop(\"expended_amount\", axis=1)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "4ad96350-0b11-4733-aa04-d1053681b24b",
- "metadata": {},
- "outputs": [],
- "source": [
- "# fill NaN with zero?\n",
- "# see if you can sum the bus column\n",
- "tircp.agg({\"#_of_buses\": \"sum\"})\n",
- "# nope this is correct"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "f2e30eb4-6adf-48b8-827b-548d806b101a",
- "metadata": {},
- "outputs": [],
- "source": [
- "display(tircp.shape, list(tircp.columns), tircp.head())"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "9eb613a5-7d1f-489c-8d9f-1226f25a61bf",
- "metadata": {},
- "outputs": [],
- "source": [
- "tircp.grant_recipient.nunique()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "33b665bf-20a0-416f-8b6b-fe835670560d",
- "metadata": {},
- "outputs": [],
- "source": [
- "# use strip to help combine names\n",
- "tircp[\"grant_recipient\"] = tircp[\"grant_recipient\"].str.strip()\n",
- "\n",
- "tircp.grant_recipient.nunique()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "ad999ca5-3dda-4bed-a161-7835b7317e3b",
- "metadata": {},
- "outputs": [],
- "source": [
- "# see list of unique names\n",
- "# may be able to consolidate a few\n",
- "tircp.grant_recipient.sort_values().unique()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "57c1b6ab-64ec-4af4-958f-3fd69a9fcf94",
- "metadata": {},
- "outputs": [],
- "source": [
- "new_dict = {\n",
- " \"Antelope Valley Transit Authority\": \"Antelope Valley Transit Authority (AVTA)\",\n",
- " \"Bay Area Rapid Transit District\": \"Bay Area Rapid Transit (BART)\",\n",
- " \"Capitol Corridor Joint Powers Authority\": \"Capitol Corridor Joint Powers Authority (CCJPA)\",\n",
- " \"Los Angeles County Metropolitan Transportation (LA Metro)\": \"Los Angeles County Metropolitan Transportation Authority (LA Metro)\",\n",
- " \"Los Angeles County Metropolitan Transportation Authority\": \"Los Angeles County Metropolitan Transportation Authority (LA Metro)\",\n",
- " \"Sacramento Regional Transit (SacRT)\": \"Sacramento Regional Transit District (SacRT)\",\n",
- " \"Sacramento Regional Transit District\": \"Sacramento Regional Transit District (SacRT)\",\n",
- " \"San Diego Metropolitan Transit System (SDMTS)\": \"San Diego Metropolitan Transit System (MTS)\",\n",
- " \"San Francisco Bay Area Water Emergency Transportation Authority\": \"San Francisco Bay Area Water Emergency Transportation Authority (WETA)\",\n",
- " \"San Francisco Municipal Transportation Agency\": \"San Francisco Municipal Transportation Authority (SFMTA)\",\n",
- " \"Santa Barbara County Association of Governments\\n(SBCAG)\": \"Santa Barbara County Association of Governments (SBCAG)\",\n",
- " \"Santa Clara Valley Transportation Authority\": \"Santa Clara Valley Transportation Authority (VTA)\",\n",
- " \"Transportation Agency for Monterey County\": \"Transportation Agency for Monterey County (TAMC)\",\n",
- "}"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "bf769e25-97fe-41eb-9ff5-c1bbda4200ad",
- "metadata": {},
- "outputs": [],
- "source": [
- "# replace the values in grant_recipient using dict\n",
- "# df.replace({'bus_desc': new_dict}, inplace=True)\n",
- "tircp = tircp.replace({\"grant_recipient\": new_dict})"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "97e5ec8e-464a-451f-add3-5965a38d2e3a",
- "metadata": {},
- "outputs": [],
- "source": [
- "# see that some rows were consolidated\n",
- "display(tircp.grant_recipient.nunique())"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "3a197b7b-29a6-46b8-adf5-e31b60c694e0",
- "metadata": {
- "jp-MarkdownHeadingCollapsed": true,
- "tags": []
- },
- "source": [
- "### Agreement Allocations-Export Cleaned data"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "5150ed0b-2fcd-4825-b12c-1fdb6bef7e64",
- "metadata": {},
- "outputs": [],
- "source": [
- "tircp.to_csv(\n",
- " \"gs://calitp-analytics-data/data-analyses/bus_procurement_cost/tircp_allocations_clean.csv\"\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "17549ca7-4ac2-4067-95ae-79e5547500f3",
- "metadata": {
- "jp-MarkdownHeadingCollapsed": true,
- "tags": []
- },
- "source": [
- "### Agreement Allocations-Read in Cleaned data from GCS"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "7e27a6f9-c57f-4e13-ae94-6d13b2ef87ba",
- "metadata": {},
- "outputs": [],
- "source": [
- "tircp = pd.read_csv(\n",
- " \"gs://calitp-analytics-data/data-analyses/bus_procurement_cost/tircp_allocations_clean.csv\"\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "6b29bc28-2f4e-4d6f-8ffc-2df929f7e27f",
- "metadata": {},
- "outputs": [],
- "source": [
- "display(tircp.shape, tircp.columns, tircp.head())"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "eab85396-0dfd-43ef-b938-310745c9d518",
- "metadata": {
- "tags": []
- },
- "source": [
- "### Agreement Allocations-Cost per Bus, per agency"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "024b16e0-45c3-4497-b9b9-6470b3981479",
- "metadata": {},
- "outputs": [],
- "source": [
- "# filer to project with bus count values\n",
- "# caveat: some rows in \"component\" column state some variation of \"purchased buses\", but did not specify the amount of buses.\n",
- "# only rows stating the specificy number of buses purchased are included\n",
- "only_bus = tircp[tircp[\"#_of_buses\"] > 0]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "02fab0b3-0c75-4220-8129-3402fb9b3007",
- "metadata": {},
- "outputs": [],
- "source": [
- "display(only_bus.shape)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "ca5e0397-b297-4236-a9c6-43d61d1021aa",
- "metadata": {},
- "outputs": [],
- "source": [
- "# aggregate # of buses and allocation by transit agency\n",
- "bus_cost = (\n",
- " only_bus.groupby(\"grant_recipient\")\n",
- " .agg({\"#_of_buses\": \"sum\", \"allocation_amount\": \"sum\"})\n",
- " .reset_index()\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "ddd7f056-4e8e-43be-9391-6b6c0574fc58",
- "metadata": {},
- "outputs": [],
- "source": [
- "bus_cost"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "a699f202-6fdc-4261-8ed5-d0fc328aa7a1",
- "metadata": {},
- "outputs": [],
- "source": [
- "bus_cost[\"cost_per_bus\"] = (\n",
- " (bus_cost[\"allocation_amount\"]) / (bus_cost[\"#_of_buses\"])\n",
- ").astype(\"int64\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "29bff029-a90d-4815-aa21-7405051d3063",
- "metadata": {},
- "outputs": [],
- "source": [
- "display(bus_cost.dtypes, bus_cost)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "d988e8f0-f655-4477-a1e0-a56f61df9e8b",
- "metadata": {},
- "outputs": [],
- "source": [
- "# exporting cost per bus\n",
- "bus_cost.to_csv(\n",
- " \"gs://calitp-analytics-data/data-analyses/bus_procurement_cost/tircp_allocation_cost_per_bus.csv\"\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "1c2e30d8-2d49-4dcb-9c75-1a8b264bb011",
- "metadata": {
- "jp-MarkdownHeadingCollapsed": true,
- "tags": []
- },
- "source": [
- "### Agreement Allocations - Stat analysis"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "02b1eb6a-559d-41a4-aa7f-982399dfe7f5",
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "4bda679f-ea66-4651-a389-80da221864d3",
- "metadata": {},
- "outputs": [],
- "source": [
- "bus_cost"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "3f7b6bd6-f0e1-476b-ac36-b695f88536d6",
- "metadata": {},
- "outputs": [],
- "source": [
- "plt.figure(\n",
- "plt.hist(bus_cost['cost_per_bus'],density=True)\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "1de41d29-4183-41a3-b6af-58e1d676f788",
- "metadata": {
- "jp-MarkdownHeadingCollapsed": true,
- "tags": []
- },
- "source": [
- "## PROJECT TRACKING SHEET DATA"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "b29ced72-1fd8-40aa-95c0-178a4db4a36b",
- "metadata": {
- "tags": []
- },
- "source": [
- "### project tracking - read raw data\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "af4ead17-a1fc-4f42-9fbf-227ae511b930",
- "metadata": {},
- "outputs": [],
- "source": [
- "gcs_path = 'gs://calitp-analytics-data/data-analyses/bus_procurement_cost/'\n",
- "file_name = 'TIRCP Tracking Sheets 2_1-10-2024.xlsx'\n",
- "sheet_name = 'Project Tracking'\n",
- "\n",
- "def get_data(path, file, sheet):\n",
- " df = pd.read_excel(path+file, sheet_name=sheet)\n",
- " \n",
- " return df\n",
- "\n",
- "project = get_data(gcs_path, file_name, sheet_name)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "767d535b-84ce-4884-8145-390c7d38b879",
- "metadata": {},
- "outputs": [],
- "source": [
- "display(\n",
- " project.shape,\n",
- " project.columns,\n",
- " project.dtypes,\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "2e9e142b-92aa-47c5-b97a-d83852dbc4a5",
- "metadata": {
- "jp-MarkdownHeadingCollapsed": true,
- "tags": []
- },
- "source": [
- "## Project Tracking- data cleaning"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "88b2f5ce-af56-4e9c-b854-9c31e0c6a853",
- "metadata": {
- "tags": []
- },
- "source": [
- "### data frame cleaning"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "e6b9047c-eda3-4631-9a1c-3338abf6c281",
- "metadata": {},
- "outputs": [],
- "source": [
- "# only keep first couple of columns\n",
- "# tircp = tircp.iloc[:, :12]\n",
- "project = project.iloc[:, :20]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "75c5cbb1-4f8c-4c7b-838b-1c0f139b8a0a",
- "metadata": {},
- "outputs": [],
- "source": [
- "list(project.columns)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "6692bddd-e46d-415f-abeb-287578f15b74",
- "metadata": {},
- "outputs": [],
- "source": [
- "# drop specific columns\n",
- "drop_col = [\n",
- " \"Master Agreement Expiration Date\",\n",
- " \"Project Manager\",\n",
- " \"Regional Coordinator\",\n",
- " \"Technical Assistance-CALITP (Y/N)\",\n",
- " \"Technical Assistance-Fleet (Y/N)\",\n",
- " \"Technical Assistance-Network Integration (Y/N)\",\n",
- " \"Technical Assistance-Priority Population (Y/N)\",\n",
- "]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "d2e692da-16fc-4f97-946f-a1e6fb3414e8",
- "metadata": {},
- "outputs": [],
- "source": [
- "project.drop(columns=drop_col, inplace=True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "e7b312e7-3676-415b-bc3c-8bf2d95694ee",
- "metadata": {},
- "outputs": [],
- "source": [
- "len(project.columns)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "3a39fe55-6664-49ce-979e-e158ae27f795",
- "metadata": {},
- "outputs": [],
- "source": [
- "# replace space with _ & lower everything\n",
- "project.columns = project.columns.str.replace(\" \", \"_\")\n",
- "project.columns = project.columns.str.lower()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "4937d3c4-f5df-4a42-84ea-161ec61e637b",
- "metadata": {},
- "outputs": [],
- "source": [
- "# check work\n",
- "project.columns"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "4372f730-9137-4e33-a45e-6563abdf4085",
- "metadata": {
- "tags": []
- },
- "source": [
- "### check columns\n",
- "check values of all columns to see if:\n",
- "-any duplicates values\n",
- "-invalid int/str values\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "83152dd0-e2a4-4892-9019-ceb28ce00f5f",
- "metadata": {},
- "outputs": [],
- "source": [
- "project.columns"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "b5f5a0d4-8616-44cf-82ab-08dcc0bc58c7",
- "metadata": {},
- "outputs": [],
- "source": [
- "# function to check column information\n",
- "def col_checker(col):\n",
- " display(\n",
- " f\"Displaying column: {col}\",\n",
- " len(project[col]),\n",
- " list(project[col].sort_values(ascending=True).unique()),\n",
- " )"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "24b1a759-280a-4a2b-9900-327cdf073c59",
- "metadata": {},
- "outputs": [],
- "source": [
- "# col is OK, all numbers\n",
- "col_checker(\"tircp_award_amount_($)\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "f912f6d4-4b1e-4868-9fe9-4e4c85124e9a",
- "metadata": {},
- "outputs": [],
- "source": [
- "# col is good, everything is a number\n",
- "col_checker(\"total_project_cost\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "a584257f-455a-4939-9897-085e1c7f95f1",
- "metadata": {},
- "outputs": [],
- "source": [
- "# col is OK\n",
- "col_checker(\"master_agreement_number\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "3d39123d-1e8b-458e-8b18-8113666de00f",
- "metadata": {},
- "outputs": [],
- "source": [
- "# col is OK\n",
- "col_checker(\"bus_count\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "022bba0f-73c7-4ae2-9986-ae234dd5517b",
- "metadata": {},
- "outputs": [],
- "source": [
- "# column is OK\n",
- "col_checker(\"project_description\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "d77f19e7-87cb-4ae0-acd0-829addb935b6",
- "metadata": {},
- "outputs": [],
- "source": [
- "project[project[\"district\"] == \"VAR\"]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "b1526686-6a92-4567-92df-0cf27c25ff01",
- "metadata": {},
- "outputs": [],
- "source": [
- "# Project title OK,\n",
- "col_checker(\"project_title\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "9dc1361f-6d1e-4067-8cf3-738f4fe9ad85",
- "metadata": {},
- "outputs": [],
- "source": [
- "# award year OK\n",
- "col_checker(\"award_year\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "e50bb102-bdab-45f2-af8e-ccca23f25247",
- "metadata": {},
- "outputs": [],
- "source": [
- "# project num OK\n",
- "col_checker(\"project_#\")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "92513e2d-8c76-4150-b4d0-f2034f3ce85c",
- "metadata": {},
- "source": [
- "---"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "94f27bed-11ce-480d-bcf5-5c2ed07b03a8",
- "metadata": {},
- "outputs": [],
- "source": [
- "# DROP COL\n",
- "# Col is OK\n",
- "col_checker(\"allocated_amount\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "55860953-f885-4e1d-ae02-f4d8d6be46bf",
- "metadata": {},
- "outputs": [],
- "source": [
- "# NEEDS CLEANING grant_recipient need to clean\n",
- "col_checker(\"grant_recipient\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "c74990d4-bda0-4615-9494-832f7b44f3f3",
- "metadata": {},
- "outputs": [],
- "source": [
- "# may need to clean, there are rows that say '3, 4'\n",
- "col_checker(\"county\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "00cc4687-19d7-4af3-81f9-37e66cd6cb33",
- "metadata": {},
- "outputs": [],
- "source": [
- "# Move to cleaning, check what is 'VAR'. various?\n",
- "# may be ok just check to make sure\n",
- "project.district.unique()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "e71ef09e-c7e5-4cdb-8e48-1be0ad1fa087",
- "metadata": {},
- "outputs": [],
- "source": [
- "# couldnt run col_checker, guessing because some PPNO numbers are inconsistent\n",
- "# may need to clean, there is a ppno of CP052/CP053\n",
- "project.ppno.unique()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "3f4e1191-979e-4a31-bce3-296fabc586dc",
- "metadata": {
- "tags": []
- },
- "source": [
- "### dropping allocated amount column"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "209b153b-f42f-4a57-97c1-087e403fec55",
- "metadata": {},
- "outputs": [],
- "source": [
- "# dropping allocated amount column\n",
- "project.drop(columns=[\"allocated_amount\"], inplace=True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "16a7479b-707a-4621-906c-022598a64179",
- "metadata": {},
- "outputs": [],
- "source": [
- "# checking work\n",
- "project.columns"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "7e8652ea-a95f-4009-b592-1414da775c61",
- "metadata": {
- "tags": []
- },
- "source": [
- "### Clean `grant_recipient` column"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "77b1e0be-20ee-454e-8e8b-a6c0e0951d44",
- "metadata": {},
- "outputs": [],
- "source": [
- "list(project.grant_recipient.sort_values(ascending=True).unique())"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "e98893df-a0f5-4fbd-af6a-8fb58602a556",
- "metadata": {},
- "outputs": [],
- "source": [
- "agency_dict = {\n",
- " \"Antelope Valley Transit Authority \": \"Antelope Valley Transit Authority (AVTA)\",\n",
- " \"Humboldt Transit Authority\": \"Humboldt Transit Authority (HTA)\",\n",
- " \"Orange County Transportation Authority\": \"Orange County Transportation Authority (OCTA)\",\n",
- " \"Capitol Corridor Joint Powers Authority\": \"Capitol Corridor Joint Powers Authority (CCJPA)\",\n",
- " \"Los Angeles County Metropolitan Transportation Authority\": \"Los Angeles County Metropolitan Transportation Authority (LA Metro)\",\n",
- " \"Monterey-Salinas Transit\": \"Monterey-Salinas Transit District (MST)\",\n",
- " \"Sacramento Regional Transit (SacRT)\": \"Sacramento Regional Transit District (SacRT)\",\n",
- " \"Sacramento Regional Transit District\": \"Sacramento Regional Transit District (SacRT)\",\n",
- " \"Sacramento Regional Transit District (SacRT) \": \"Sacramento Regional Transit District (SacRT)\",\n",
- " \"San Diego Association of Governments\": \"San Diego Association of Governments (SANDAG)\",\n",
- " \"Santa Clara Valley Transportation Authority (SCVTA)\": \"Santa Clara Valley Transportation Authority (VTA)\",\n",
- " \"Southern California Regional Rail Authority (SCRRA)\": \"Southern California Regional Rail Authority (SCRRA - Metrolink)\",\n",
- " \"Southern California Regional Rail Authority\": \"Southern California Regional Rail Authority (SCRRA - Metrolink)\",\n",
- "}"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "41eedd51-e052-4462-bf8f-e14bea1df7cc",
- "metadata": {},
- "outputs": [],
- "source": [
- "# df.replace({'bus_desc': new_dict}, inplace=True)\n",
- "project.replace({\"grant_recipient\": agency_dict}, inplace=True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "195fb532-8154-4ca1-ae6a-81b16d6f031e",
- "metadata": {},
- "outputs": [],
- "source": [
- "# check work. looks good\n",
- "list(project[\"grant_recipient\"].sort_values().unique())"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "4acd9422-7cc0-4082-a87a-628187d1736f",
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "markdown",
- "id": "15660296-1348-4463-a962-ac22234c2f7e",
- "metadata": {
- "tags": []
- },
- "source": [
- "### Cleaning `county` column"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "13635af7-3952-42f4-a272-c7a462ea1358",
- "metadata": {},
- "outputs": [],
- "source": [
- "col_checker(\"county\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "4e3f6614-dc19-4011-a878-fcc0e8570b41",
- "metadata": {},
- "outputs": [],
- "source": [
- "#checking specific row with '3,4' as county\n",
- "project[project[\"county\"] == \"3, 4\"]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "1de28ea8-d063-4797-8111-2cf073e71e71",
- "metadata": {},
- "outputs": [],
- "source": [
- "# change county value from '3, 4' to 'VAR' like the other rows.\n",
- "project.at[3, \"county\"] = \"VAR\""
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "23170f02-1d66-4bb3-917c-9ca2ceaba627",
- "metadata": {},
- "outputs": [],
- "source": [
- "# check work\n",
- "project.iloc[3]"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "52ae834d-72a2-4c51-8b78-a533a807d497",
- "metadata": {
- "tags": []
- },
- "source": [
- "### Cleaning `district`column\n",
- "This is good as is, no cleaning requried. All rows with VAR district has VAR in county as well."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "39b424d0-7d59-4d7b-88f2-ffc5b80ae9c8",
- "metadata": {},
- "outputs": [],
- "source": [
- "#GTG\n",
- "project.district.unique()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "3ad49c6e-c13a-4460-b214-8ddac6e24f29",
- "metadata": {},
- "outputs": [],
- "source": [
- "#GTG \n",
- "project[project[\"district\"] == \"VAR\"]"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "b7be5cfa-b593-4c26-849a-2d9c2f777f34",
- "metadata": {
- "tags": []
- },
- "source": [
- "### Clean `ppno` column\n",
- "This should all be fine as is, no cleaning needed"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "3611650f-aa4d-4cf8-8153-4c809c0e7240",
- "metadata": {},
- "outputs": [],
- "source": [
- "list(project.ppno.unique())"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "8c92f80b-5898-4ff7-b184-cdb804ff564a",
- "metadata": {},
- "outputs": [],
- "source": [
- "#GTG \n",
- "project[project[\"ppno\"] == \"CP052/CP053\"]"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "7f8dd43f-ebef-4a75-aa2f-63b1faf6f514",
- "metadata": {
- "tags": []
- },
- "source": [
- "### Skim the project description column?\n",
- "double check to ensure bus count is accurate to what the description says?\n",
- "\n",
- "Saw that some rows mention procuring both zero and non-zero emission buses (count total buses in `bus count` and `VAR` in prop type and bus size?\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "163a339e-7564-455e-9477-b9df0914ef0c",
- "metadata": {},
- "outputs": [],
- "source": [
- "project[\n",
- " project[\"project_title\"]\n",
- " == \"ATN FAST (Family of Advanced Solutions for Transit): Revolutionizing Transit for a Global Audience\"\n",
- "]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "d6c9992f-8a4f-403d-8bc8-f45b55575402",
- "metadata": {},
- "outputs": [],
- "source": [
- "# iloc check\n",
- "project.iloc[73]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "1527b7df-c3ca-4041-91bb-a44998da584e",
- "metadata": {},
- "outputs": [],
- "source": [
- "# code to update value at specific index and column\n",
- "project.loc[project['ppno'] == 'CP106', 'bus_count'] = 42\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "74702a2f-0b82-47ae-81ae-f7c021394ef0",
- "metadata": {},
- "outputs": [],
- "source": [
- "# check work\n",
- "project.iloc[73]"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "1c6f0650-9ef5-46cd-a2b7-63265c793780",
- "metadata": {},
- "source": [
- "---"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "5140da93-1ee3-4bd8-8cd4-01745947ff48",
- "metadata": {},
- "source": [
- "## Export cleaned Project df "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "55872945-c6fa-4a98-b674-91f91d39d08f",
- "metadata": {},
- "outputs": [],
- "source": [
- "# exproject cleaned project df\n",
- "project.to_csv(\n",
- " \"gs://calitp-analytics-data/data-analyses/bus_procurement_cost/tircp_project_clean.csv\"\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "a931b907-8431-4290-96dc-2e1b40b6e64f",
- "metadata": {
- "tags": []
- },
- "source": [
- "## Read in cleaned project data"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "9b12c971-94d5-4ef8-93d3-2994df1826d3",
- "metadata": {},
- "outputs": [],
- "source": [
- "project = pd.read_csv(\n",
- " \"gs://calitp-analytics-data/data-analyses/bus_procurement_cost/tircp_project_clean.csv\"\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "d5c9f3d1-9437-48a3-a37f-0d74860a1499",
- "metadata": {},
- "outputs": [],
- "source": [
- "# ensure df is able to read in\n",
- "display(project.shape, project.columns)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "51f95400-1198-46a6-a41a-fef99b3a2ffa",
- "metadata": {
- "tags": []
- },
- "source": [
- "### filter df for project descriptions that contain bus"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "fc9ee142-13b2-4fc2-86af-d712ab5df6c4",
- "metadata": {},
- "outputs": [],
- "source": [
- "bus_only = project[project[\"bus_count\"] > 0]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "c65b773e-dd41-4c95-8e6b-2132e5d7e978",
- "metadata": {},
- "outputs": [],
- "source": [
- "# this looks correct\n",
- "display(project.shape, bus_only.shape)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "fdd09938-88b1-4fcd-8159-1637a57ee0f4",
- "metadata": {
- "jp-MarkdownHeadingCollapsed": true,
- "tags": []
- },
- "source": [
- "## New column for propulsion type - `prop_type`\n",
- "Use on `bus_only` df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "630e1662-9a32-4a2c-8174-1a0dc59ad42e",
- "metadata": {},
- "outputs": [],
- "source": [
- "prop_type = [\n",
- " \"electric buses\",\n",
- " \"electric commuter\",\n",
- " \"Electric Buses\",\n",
- " \"battery electric\",\n",
- " \"Batery Electric\",\n",
- " \"battery-electric\",\n",
- " \"fuel-cell\",\n",
- " \"fuel cell\",\n",
- " \"Fuel Cell\",\n",
- " \"zero emission\",\n",
- " \"Zero Emission\",\n",
- " \"zero-emission electric buses\",\n",
- " \"zero-emission buses\",\n",
- " \"zero‐emission\",\n",
- " \"zero-emission\",\n",
- " \"zeroemission\",\n",
- " \"CNG\",\n",
- " \"cng\",\n",
- "]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "9cb544f0-81e3-4644-a4f8-5b83009e3e18",
- "metadata": {},
- "outputs": [],
- "source": [
- "type(prop_type)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "fa958cba-0e59-464b-959f-c4c18b61f8cc",
- "metadata": {},
- "outputs": [],
- "source": [
- "# function to match keywords to list\n",
- "def prop_type_finder(description):\n",
- " for keyword in prop_type:\n",
- " if keyword in description:\n",
- " return keyword\n",
- " return \"not specified\""
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "6c9f56c8-ee70-426c-ba62-2294b2b13fa7",
- "metadata": {},
- "outputs": [],
- "source": [
- "# add new col `prop_type`, fill it with values based on project_description using prop_type_finder function\n",
- "bus_only[\"prop_type\"] = bus_only[\"project_description\"].apply(prop_type_finder)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "c35ceca0-1049-4c1f-b24c-569c45f97f5e",
- "metadata": {},
- "outputs": [],
- "source": [
- "# check work\n",
- "display(\n",
- " bus_only.columns,\n",
- " bus_only[\"prop_type\"].value_counts(),\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "251b72b3-7ab4-4028-8cad-1f39b4e334c0",
- "metadata": {},
- "outputs": [],
- "source": [
- "# exploring the not specified rows\n",
- "bus_only[bus_only[\"prop_type\"] == \"not specified\"]\n",
- "# coach-style buses, this row does not specify if buses are zero or non-zero emission bus. GOOD TO GO"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "bd6eda27-aa67-4a3c-b863-531fbb667da3",
- "metadata": {},
- "outputs": [],
- "source": [
- "# what is in CNG rows?\n",
- "bus_only[bus_only[\"prop_type\"] == \"CNG\"]\n",
- "# was 4 rows, then adjusted prop list to have cng at the bottom. now showing 1 row thats actually CNG"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "8f056155-1a0f-4d7f-85af-ee6d96069eab",
- "metadata": {},
- "outputs": [],
- "source": [
- "# consolidate values\n",
- "list(bus_only[\"prop_type\"].sort_values(ascending=True).unique())"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "8964ac4d-d7c8-457c-98b0-577d6e5e30ef",
- "metadata": {},
- "outputs": [],
- "source": [
- "prop_dict = {\n",
- " \"battery electric\": \"BEB\",\n",
- " \"battery-electric\": \"BEB\",\n",
- " \"electric buses\": \"electric (not specified)\",\n",
- " \"electric commuter\": \"electric (not specified)\",\n",
- " \"fuel cell\": \"FCEB\",\n",
- " \"fuel-cell\": \"FCEB\",\n",
- " \"zero-emission buses\": \"zero-emission bus (not specified)\",\n",
- " \"zero emission\": \"zero-emission bus (not specified)\",\n",
- " \"zero-emission\": \"zero-emission bus (not specified)\",\n",
- " \"zero‐emission\": \"zero-emission bus (not specified)\",\n",
- "}"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "1204ed35-8e16-4a14-9c41-8c22b24a1503",
- "metadata": {},
- "outputs": [],
- "source": [
- "# replacing prop_type values with dictionary\n",
- "bus_only.replace({\"prop_type\": prop_dict}, inplace=True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "d2da1c1b-c91b-42e5-b3ff-583f3fd60676",
- "metadata": {},
- "outputs": [],
- "source": [
- "# check work\n",
- "display(bus_only.prop_type.value_counts(), bus_only.head())\n",
- "\n",
- "# looks good"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "392dd768-88e4-42bb-a26f-ab95e225b271",
- "metadata": {
- "jp-MarkdownHeadingCollapsed": true,
- "tags": []
- },
- "source": [
- "## New column for bus size type - `bus_size_type`\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "278079a5-9ebb-4ac2-9363-ecfc0b6a818f",
- "metadata": {},
- "outputs": [],
- "source": [
- "bus_size = [\n",
- " \"standard\",\n",
- " \"30-foot\",\n",
- " \"40 foot\",\n",
- " \"40-foot\",\n",
- " \"45-foot\",\n",
- " \"45 foot\",\n",
- " \"40ft\",\n",
- " \"60-foot\",\n",
- " \"articulated\",\n",
- " \"cutaway\",\n",
- " \"coach-style\",\n",
- " \"over-the-road\",\n",
- " \"feeder bus\",\n",
- "]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "9988791c-8ec6-45c4-b7b6-4fa8ccbfe823",
- "metadata": {},
- "outputs": [],
- "source": [
- "type(bus_size)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "71b25adf-440a-4f52-9d18-f908f57d5aab",
- "metadata": {},
- "outputs": [],
- "source": [
- "# re writing prop type funct for bus size\n",
- "def bus_size_finder(description):\n",
- " for keyword in bus_size:\n",
- " if keyword in description:\n",
- " return keyword\n",
- " return \"not specified\""
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "45ab8fdc-e752-4584-8bb8-af4426861a71",
- "metadata": {},
- "outputs": [],
- "source": [
- "# creating new column, filling the column using the function applied to project_desctiotion\n",
- "bus_only[\"bus_size_type\"] = bus_only[\"project_description\"].apply(bus_size_finder)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "c0ce4822-7356-4e52-94cb-04269eb82db9",
- "metadata": {},
- "outputs": [],
- "source": [
- "# checking work\n",
- "display(bus_only.columns, bus_only.bus_size_type.value_counts())"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "a17a1dfa-d70a-4076-b9c1-618a43398262",
- "metadata": {},
- "outputs": [],
- "source": [
- "list(bus_only['bus_size_type'].sort_values().unique())"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "ad0bda67-e3b2-4bdf-9bfb-0afa7b19bc0d",
- "metadata": {},
- "outputs": [],
- "source": [
- "# expected that not a lot of rows specify a size type.\n",
- "# will still take a random peek into some\n",
- "\n",
- "bus_only[bus_only[\"bus_size_type\"] == \"not specified\"].sample(5)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "ab06f71f-3f64-4b12-8164-fe3524c97d64",
- "metadata": {},
- "outputs": [],
- "source": [
- "# consolidate\n",
- "size_dict={'40 foot': 'conventional (40-ft like)' ,\n",
- " '40-foot': 'conventional (40-ft like)',\n",
- " '45-foot': 'conventional (40-ft like)',\n",
- " 'coach-style':'over-the-road',\n",
- " 'feeder bus': 'conventional (40-ft like)',\n",
- " }"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "be465cfd-fb25-4862-a709-c3dd675a8fbf",
- "metadata": {},
- "outputs": [],
- "source": [
- "type(size_dict)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "d66297eb-2ced-4148-9722-167797349280",
- "metadata": {},
- "outputs": [],
- "source": [
- "# .replace() with size_dict to replace values in bus size col\n",
- "bus_only.replace({\"bus_size_type\": size_dict}, inplace=True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "055fbee5-b73b-4779-801b-1c0d23e081af",
- "metadata": {},
- "outputs": [],
- "source": [
- "# check work\n",
- "bus_only.bus_size_type.value_counts()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "1b95f567-13fc-4184-8b09-f512f702f3f0",
- "metadata": {
- "tags": []
- },
- "source": [
- "## export project- bus only df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "cecea030-b37a-4170-9997-656e8bd0c080",
- "metadata": {},
- "outputs": [],
- "source": [
- "bus_only.to_parquet(\n",
- " \"gs://calitp-analytics-data/data-analyses/bus_procurement_cost/tircp_project_bus_only.parquet\"\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "1ea926da-8d76-481c-a5fb-ef39606f45ca",
- "metadata": {
- "tags": []
- },
- "source": [
- "## Read in project bus only data\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "id": "abaf10b2-0dc3-432d-845a-8dacb2af806f",
- "metadata": {},
- "outputs": [],
- "source": [
- "bus_checker = pd.read_parquet(\n",
- " \"gs://calitp-analytics-data/data-analyses/bus_procurement_cost/tircp_project_bus_only.parquet\"\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "id": "ab4b7cf3-183e-4ea1-baca-e3f5d0bd9dd6",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(35, 14)"
- ]
- },
- "execution_count": 3,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "bus_checker.shape"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "id": "9000c886-c46c-436d-a0b5-ec55bd2a4cd2",
- "metadata": {},
- "outputs": [],
- "source": [
- "bus_checker = bus_checker.assign(\n",
- " project_type = bus_checker['project_description'].apply(project_type_checker)\n",
- ")\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "id": "bcdeb08c-50bf-4a63-b2c8-086cdfa96ffe",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "includes bus and non-bus components 24\n",
- "bus only 11\n",
- "Name: project_type, dtype: int64"
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "bus_checker[\"project_type\"].value_counts()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "id": "fb93fbc9-ab8d-4239-a55e-3ec2b3000541",
- "metadata": {},
- "outputs": [],
- "source": [
- "# just_bus rows are all good. \n",
- "just_bus = bus_checker[bus_checker['project_type'] == \"bus only\"]\n",
- "\n",
- "# bus_non_bus rows are all good\n",
- "bus_non_bus = bus_checker[bus_checker['project_type'] == \"includes bus and non-bus components\"]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "id": "b88e65bc-86d2-419e-93d6-e3f066173b2b",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- "
project_description
\n",
- "
project_type
\n",
- "
bus_count
\n",
- "
prop_type
\n",
- "
\n",
- " \n",
- " \n",
- "
\n",
- "
0
\n",
- "
Purchase 13 60-foot articulated BRT buses and 16 45-foot electric commuter buses
\n",
- "
bus only
\n",
- "
13.0
\n",
- "
electric (not specified)
\n",
- "
\n",
- "
\n",
- "
5
\n",
- "
Purchase five 40-foot CNG buses for BRT Route linking SARTC to Metrolink/Amtrak
\n",
- "
bus only
\n",
- "
40.0
\n",
- "
CNG
\n",
- "
\n",
- "
\n",
- "
16
\n",
- "
Purchase 20 zero-emission buses to extend Route 486 to the Pamona Metrolink station and increase frequencies
\n",
- "
bus only
\n",
- "
20.0
\n",
- "
zero-emission bus (not specified)
\n",
- "
\n",
- "
\n",
- "
34
\n",
- "
Acquire 112 zero-emission buses to replace existing propane vehicles and add new vehicles, in order to increase frequency of all existing DASH routes to 15-minute service and add 4 new routes, serving communities throughout the City of Los Angeles as recommended in the comprehensive Transit Service Analysis.
\n",
- "
bus only
\n",
- "
112.0
\n",
- "
zero-emission bus (not specified)
\n",
- "
\n",
- "
\n",
- "
51
\n",
- "
Purchase 7 new coach-style buses to support a new intercity service that connects Redding to Sacramento. Purchase 7 new coach-style buses to support a new intercity service that connects Redding to Sacramento Shata Regional Transportation Agency Bus System to Sacramento International Airport
\n",
- "
bus only
\n",
- "
14.0
\n",
- "
not specified
\n",
- "
\n",
- "
\n",
- "
68
\n",
- "
Purchase 7 zero emission buses to enhance and extend Route 14 from Playa Vista to Inglewood, bringing new transit opportunities to disadvantaged communities, while also integrating light rail and bus services.
\n",
- "
bus only
\n",
- "
7.0
\n",
- "
zero-emission bus (not specified)
\n",
- "
\n",
- "
\n",
- "
70
\n",
- "
Purchase 7 electric buses to expand services on Line 4X (between Torrance and Downton LA), on an extended line 10 (serving the Metro Green Line Crenshaw station and the Inglewood Stadium and Entertainment District, an extended line 9 (newly serving the Kaiser Permanente South Bay Medical Center), and the acquisition of the western portion of LA Metro’s Route 130 between the Blue Line Artesia Station and the South Bay Galleria Mall.
\n",
- "
bus only
\n",
- "
7.0
\n",
- "
electric (not specified)
\n",
- "
\n",
- "
\n",
- "
71
\n",
- "
Purchases 3 zero-emission electric buses to increase fleet size and extend bus service levels on 2 fixed routes in Merced county. The proposed project allows for an expansion of service frequency on one existing inter-community route connecting rural communities to the city of Merced. The route currently operates on limited frequency and is not enough to keep up with existing demand. The project also expands local service on one local route to provide better bus service to a developed residential area currently with limited access to service.
\n",
- "
bus only
\n",
- "
3.0
\n",
- "
electric (not specified)
\n",
- "
\n",
- "
\n",
- "
81
\n",
- "
Purchase of 3 zero-emission buses that will support Wasco's local Dia-a-Ride shuttle services to expand service to affordable housing projects and expand overall service availability by 50%.
\n",
- "
bus only
\n",
- "
3.0
\n",
- "
zero-emission bus (not specified)
\n",
- "
\n",
- "
\n",
- "
95
\n",
- "
Supports the phased development of an east-west Cross Valley Corridor by purchasing 14 zero-emission feeder buses in multiple cities in and along the corridor (as well as 16 micro-transit vehicles to be operated in selected cities) that will provide comprehensive access to the future rail system for all these communities and will connect to the California High Speed Rail system.
\n",
- "
bus only
\n",
- "
14.0
\n",
- "
zero-emission bus (not specified)
\n",
- "
\n",
- "
\n",
- "
103
\n",
- "
The Project implements a new transit service using electric minibuses to serve underserved communities and includes purchasing 5 zero‐emission buses. Also establishes an all‐day and late‐night micro‐transit service in the Downtown area of Culver City and includes procuring 5 vehicles to operate the service
\n",
- "
bus only
\n",
- "
5.0
\n",
- "
zero-emission bus (not specified)
\n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " project_description \\\n",
- "0 Purchase 13 60-foot articulated BRT buses and 16 45-foot electric commuter buses \n",
- "5 Purchase five 40-foot CNG buses for BRT Route linking SARTC to Metrolink/Amtrak \n",
- "16 Purchase 20 zero-emission buses to extend Route 486 to the Pamona Metrolink station and increase frequencies \n",
- "34 Acquire 112 zero-emission buses to replace existing propane vehicles and add new vehicles, in order to increase frequency of all existing DASH routes to 15-minute service and add 4 new routes, serving communities throughout the City of Los Angeles as recommended in the comprehensive Transit Service Analysis. \n",
- "51 Purchase 7 new coach-style buses to support a new intercity service that connects Redding to Sacramento. Purchase 7 new coach-style buses to support a new intercity service that connects Redding to Sacramento Shata Regional Transportation Agency Bus System to Sacramento International Airport \n",
- "68 Purchase 7 zero emission buses to enhance and extend Route 14 from Playa Vista to Inglewood, bringing new transit opportunities to disadvantaged communities, while also integrating light rail and bus services. \n",
- "70 Purchase 7 electric buses to expand services on Line 4X (between Torrance and Downton LA), on an extended line 10 (serving the Metro Green Line Crenshaw station and the Inglewood Stadium and Entertainment District, an extended line 9 (newly serving the Kaiser Permanente South Bay Medical Center), and the acquisition of the western portion of LA Metro’s Route 130 between the Blue Line Artesia Station and the South Bay Galleria Mall. \n",
- "71 Purchases 3 zero-emission electric buses to increase fleet size and extend bus service levels on 2 fixed routes in Merced county. The proposed project allows for an expansion of service frequency on one existing inter-community route connecting rural communities to the city of Merced. The route currently operates on limited frequency and is not enough to keep up with existing demand. The project also expands local service on one local route to provide better bus service to a developed residential area currently with limited access to service. \n",
- "81 Purchase of 3 zero-emission buses that will support Wasco's local Dia-a-Ride shuttle services to expand service to affordable housing projects and expand overall service availability by 50%. \n",
- "95 Supports the phased development of an east-west Cross Valley Corridor by purchasing 14 zero-emission feeder buses in multiple cities in and along the corridor (as well as 16 micro-transit vehicles to be operated in selected cities) that will provide comprehensive access to the future rail system for all these communities and will connect to the California High Speed Rail system. \n",
- "103 The Project implements a new transit service using electric minibuses to serve underserved communities and includes purchasing 5 zero‐emission buses. Also establishes an all‐day and late‐night micro‐transit service in the Downtown area of Culver City and includes procuring 5 vehicles to operate the service \n",
- "\n",
- " project_type bus_count prop_type \n",
- "0 bus only 13.0 electric (not specified) \n",
- "5 bus only 40.0 CNG \n",
- "16 bus only 20.0 zero-emission bus (not specified) \n",
- "34 bus only 112.0 zero-emission bus (not specified) \n",
- "51 bus only 14.0 not specified \n",
- "68 bus only 7.0 zero-emission bus (not specified) \n",
- "70 bus only 7.0 electric (not specified) \n",
- "71 bus only 3.0 electric (not specified) \n",
- "81 bus only 3.0 zero-emission bus (not specified) \n",
- "95 bus only 14.0 zero-emission bus (not specified) \n",
- "103 bus only 5.0 zero-emission bus (not specified) "
- ]
- },
- "execution_count": 8,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "just_bus[[\"project_description\", \"project_type\", \"bus_count\", \"prop_type\"]]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "id": "4b4db842-cf70-4c44-9e04-a2599d162df9",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- "
project_description
\n",
- "
project_type
\n",
- "
bus_count
\n",
- "
prop_type
\n",
- "
\n",
- " \n",
- " \n",
- "
\n",
- "
11
\n",
- "
Bus rapid transit infrastructure along the MLK Corridor and Crosstown Miner Corridor, including the acquisition of 12 new zero-emission electric vehicles
\n",
- "
includes bus and non-bus components
\n",
- "
12.0
\n",
- "
zero-emission bus (not specified)
\n",
- "
\n",
- "
\n",
- "
29
\n",
- "
Deploys 40 zero-emission electric buses to double service levels on up to 8 routes, add 2 new routes; Implements a new circulator/on-demand first-mile/last-mile service; and construction of a new maintenance facility with solar canopy structures.
\n",
- "
includes bus and non-bus components
\n",
- "
40.0
\n",
- "
electric (not specified)
\n",
- "
\n",
- "
\n",
- "
30
\n",
- "
Deploys 7 zero-emission battery electric buses and upgrades charging infrastructure serving AVTA local and commuter bus routes, bringing the entire AVTA system to fully electric status (the first in the nation) by 2019; Deploys 5 zero-emission battery electric buses and related infrastructure for Long Beach Transit services. Increased frequency on up to 5 local and community transit routes operated by LBT.
\n",
- "
includes bus and non-bus components
\n",
- "
7.0
\n",
- "
electric (not specified)
\n",
- "
\n",
- "
\n",
- "
33
\n",
- "
Purchase of 6 zero-emission battery-electric buses and the construction of charging infrastructure to allow extension of 15-min service connecting Southwest Fresno to the northern part of Fresno and creating a new route providing access to job centers.
\n",
- "
includes bus and non-bus components
\n",
- "
6.0
\n",
- "
electric (not specified)
\n",
- "
\n",
- "
\n",
- "
35
\n",
- "
Construction- Purchase 10- 40 foot battery electric buses
\n",
- "
includes bus and non-bus components
\n",
- "
10.0
\n",
- "
electric (not specified)
\n",
- "
\n",
- "
\n",
- "
52
\n",
- "
Purchases 13 electric buses and funds capital improvements including new bus stops, pedestrian crossings, and charging infrastructure
\n",
- "
includes bus and non-bus components
\n",
- "
13.0
\n",
- "
electric (not specified)
\n",
- "
\n",
- "
\n",
- "
56
\n",
- "
Purchase of 11 zero emission battery electric buses and supportive charging infrastructure to allow for expansion of the zero-emission bus fleet and implement a new zero-emission microtransit service that is fully integrated into local and regional intermodal transit networks.
\n",
- "
includes bus and non-bus components
\n",
- "
11.0
\n",
- "
electric (not specified)
\n",
- "
\n",
- "
\n",
- "
60
\n",
- "
Construction of a new transit center in Clearlake and purchase 4 hydrogen fuel-cell buses with associated infrastructure. The project would expand service to out of county destinations, including the Sonoma County Airport and the Santa Rosa Bus Terminal in Downtown Santa Rosa. Hydrogen fuel cell technology is used in order to allow extended range services to be operated, contributing to increased ridership.
\n",
- "
includes bus and non-bus components
\n",
- "
4.0
\n",
- "
FCEB
\n",
- "
\n",
- "
\n",
- "
61
\n",
- "
Purchase of 5 zero-emission battery- electric buses and the construction of charging infrastructure to create a zero-emission over-the-road coach commuter route between the Greater Long Beach area and the University of California, Los Angeles (UCLA).
\n",
- "
includes bus and non-bus components
\n",
- "
5.0
\n",
- "
electric (not specified)
\n",
- "
\n",
- "
\n",
- "
73
\n",
- "
Creates a zero-emission transit ecosystem that offers end-to-end solutions for residents, employees and the global audience drawn by tourism/convention centers and the LA 2028 Summer Olympics events. Project components include (1) purchase of 7 zero-emission battery electric vans to implement a new service connecting John Wayne Airport to Anaheim, (2) purchase of 10 electric vehicles and associated infrastructure to expand on-demand micro transit services into new neighborhoods and service areas, (3) purchase of 15 zeroemission buses to replace existing buses and augment existing routes, including installation of photovoltaic electricity generation at two facilities, and (4) purchase of 10 additional zero-emission buses for a new east/west connector service.
\n",
- "
includes bus and non-bus components
\n",
- "
42.0
\n",
- "
BEB
\n",
- "
\n",
- "
\n",
- "
74
\n",
- "
Purchase of 6 zero emission microtransit buses to augment existing microtransit services and expand the service area, purchase 6 zeroemission school buses (in partnership with the Antelope Valley School Transportation Agency), and implements associated charging infrastructure.
\n",
- "
includes bus and non-bus components
\n",
- "
12.0
\n",
- "
zero-emission bus (not specified)
\n",
- "
\n",
- "
\n",
- "
78
\n",
- "
Project purchases 27 battery-electric busses for replacement and expansion, allowing for reduced headway service on two routes and a new route to connect the Glendale Transportation Center with Glendale Community College, and completing the transition to zeroemission for the Arroyo Verdugo Transit Operators, serving Glendale, La Cañada Flintridge, La Crescenta and Montrose. Also includes design and construction of a new parking deck to accommodate associated infrastructure and a photovoltaic (solar) canopy. Lastly, it provides upgrades to 400 bus stops, contactless payment options and a new smart phone application for riders.
\n",
- "
includes bus and non-bus components
\n",
- "
27.0
\n",
- "
BEB
\n",
- "
\n",
- "
\n",
- "
80
\n",
- "
Purchase of 10 zero-emission electric buses and associated charging infrastructure to replace CNG and gas buses and implement service expansion between the City of Torrance and Downtown Los Angeles. Buses will be deployed in more frequent service on key routes, including services that will take advantage of bus priority lanes
\n",
- "
includes bus and non-bus components
\n",
- "
10.0
\n",
- "
electric (not specified)
\n",
- "
\n",
- "
\n",
- "
84
\n",
- "
Procure 11 hydrogen fuel cell electric buses, design and install a hydrogen fueling station to provide fuel for the buses and for private and other fleet vehicles, and design and construct an intermodal transit and housing center. The buses will serve the local Trinidad to Scotia route as well as a new intercity route to Ukiah, connecting riders to Mendocino County and south to the SMART train and the San Francisco Bay Area. The hydrogen station and transit and housing center will both be located in low-income census tracts in downtown Eureka, the Humboldt County seat and largest city.
\n",
- "
includes bus and non-bus components
\n",
- "
11.0
\n",
- "
electric (not specified)
\n",
- "
\n",
- "
\n",
- "
85
\n",
- "
Purchases 261 zero emission buses and supportive infrastructure to deploy on LA Metro's Tier 1 and Tier 2 routes from Divisions 9 and 18, as well as the J (Silver) Line. Project also includes corridor improvements on high frequency bus corridors (many shared with other transit agencies), including bus-only lanes, transit signal priority, bus bulbs and boarding islands, bus shelters, and real-time passenger information.
\n",
- "
includes bus and non-bus components
\n",
- "
261.0
\n",
- "
zero-emission bus (not specified)
\n",
- "
\n",
- "
\n",
- "
92
\n",
- "
Purchases eight battery-electric buses and 3 electric microtransit vans, continuing fleet conversion and allowing expansion of microtransit service into additional zones serving the City of Goleta, UC Santa Barbara, and the Goleta rail station. Funds general transit improvements including signal priority, contactless payment deployment, additional bike racks, and bus shelter improvements, and constructs facility improvements at two terminals including the construction of new ZEB infrastructure.
\n",
- "
includes bus and non-bus components
\n",
- "
8.0
\n",
- "
electric (not specified)
\n",
- "
\n",
- "
\n",
- "
93
\n",
- "
Includes the purchase of 30 zero-emission buses and associated charging infrastructure and passenger amenities for Petaluma Transit, Santa Rosa CityBus and Sonoma County Transit, construction of the SMART Petaluma North commuter rail station, and improved network integration among all application partners and other transit operators in Sonoma County, including contactless payment equipment for Mendocino Transit Authority
\n",
- "
includes bus and non-bus components
\n",
- "
30.0
\n",
- "
zero-emission bus (not specified)
\n",
- "
\n",
- "
\n",
- "
99
\n",
- "
Purchases 40 zero-emission buses and associated infrastructure and implements a set of interrelated transit improvements. Includes service optimization improvements such as transit signal priority and other corridor improvements, installation of fare payment validators, and onboard passenger amenities.
\n",
- "
includes bus and non-bus components
\n",
- "
40.0
\n",
- "
zero-emission bus (not specified)
\n",
- "
\n",
- "
\n",
- "
101
\n",
- "
The Project expands frequency on two high‐performing routes and fully delivers phases 2‐4 of the City's bus charging infrastructure plan. This includes the implementation of a new vehicle charging system and utility upgrades, including construction of a charging canopy, to support fleet electrification efforts; and purchase up to 103 zero‐emission buses to replace existing CNG buses (which includes a mix of 30‐foot, 40‐foot and 60‐foot vehicles).
\n",
- "
includes bus and non-bus components
\n",
- "
103.0
\n",
- "
zero-emission bus (not specified)
\n",
- "
\n",
- "
\n",
- "
102
\n",
- "
The Project constructs a new transit center, linking city bus routes with regional routes, including to regional rail service; constructs a park‐and‐ride lot at the proposed new transit center; purchases 6 zero‐emission buses to replace existing CNG buses that will allow the applicant to operate three routes with zero‐emission buses.
\n",
- "
includes bus and non-bus components
\n",
- "
6.0
\n",
- "
zero-emission bus (not specified)
\n",
- "
\n",
- "
\n",
- "
105
\n",
- "
This project deploys 4 zero‐emission buses in Tribal and low‐income population regions, constructs a new transit center in Crescent City, expand existing intercity routes under a unified Redwood Coast Express brand to reduce transfers and increase ridership, and accelerate statewide efforts to develop a prototype fuel cell electric over‐the‐road coach (procuring 1 demonstration coach)
\n",
- "
includes bus and non-bus components
\n",
- "
4.0
\n",
- "
FCEB
\n",
- "
\n",
- "
\n",
- "
110
\n",
- "
The Project includes a set of interrelated investments with five components, including: Purchasing 33 expansion zero‐emission buses and associated infrastructure to expand service; Replacing 10 gas‐powered paratransit vehicles with zero‐emission vehicles to support an existing paratransit bus pilot effort; Implementation of transit signal priority along a key 12‐mile corridor; Installation of bicycle lockers and fast charging stations at targeted transit stations.
\n",
- "
includes bus and non-bus components
\n",
- "
33.0
\n",
- "
zero-emission bus (not specified)
\n",
- "
\n",
- "
\n",
- "
117
\n",
- "
Purchases 24 zero-emission buses to expand service frequency on the heavily traversed Highways 1 and Highway 17 corridors while also redeveloping the Watsonville Transit Station and Pacific Station to include more than 180 mixed‐use, affordable housing units and implements real time passenger information, bicycle amenities and other multimodal improvements.
\n",
- "
includes bus and non-bus components
\n",
- "
24.0
\n",
- "
zero-emission bus (not specified)
\n",
- "
\n",
- "
\n",
- "
123
\n",
- "
Constructs a new transit facility that can support a growing zero‐emission vehicle fleet and purchases 15 zero‐emission buses to implement a new on‐demand local bus service in five newly formed transit service zones within Yuba and Sutter counties. Also includes 1 zero‐emission over‐the‐road coach to implement a new commuter service to Roseville.
\n",
- "
includes bus and non-bus components
\n",
- "
16.0
\n",
- "
zero-emission bus (not specified)
\n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " project_description \\\n",
- "11 Bus rapid transit infrastructure along the MLK Corridor and Crosstown Miner Corridor, including the acquisition of 12 new zero-emission electric vehicles \n",
- "29 Deploys 40 zero-emission electric buses to double service levels on up to 8 routes, add 2 new routes; Implements a new circulator/on-demand first-mile/last-mile service; and construction of a new maintenance facility with solar canopy structures. \n",
- "30 Deploys 7 zero-emission battery electric buses and upgrades charging infrastructure serving AVTA local and commuter bus routes, bringing the entire AVTA system to fully electric status (the first in the nation) by 2019; Deploys 5 zero-emission battery electric buses and related infrastructure for Long Beach Transit services. Increased frequency on up to 5 local and community transit routes operated by LBT. \n",
- "33 Purchase of 6 zero-emission battery-electric buses and the construction of charging infrastructure to allow extension of 15-min service connecting Southwest Fresno to the northern part of Fresno and creating a new route providing access to job centers. \n",
- "35 Construction- Purchase 10- 40 foot battery electric buses \n",
- "52 Purchases 13 electric buses and funds capital improvements including new bus stops, pedestrian crossings, and charging infrastructure \n",
- "56 Purchase of 11 zero emission battery electric buses and supportive charging infrastructure to allow for expansion of the zero-emission bus fleet and implement a new zero-emission microtransit service that is fully integrated into local and regional intermodal transit networks. \n",
- "60 Construction of a new transit center in Clearlake and purchase 4 hydrogen fuel-cell buses with associated infrastructure. The project would expand service to out of county destinations, including the Sonoma County Airport and the Santa Rosa Bus Terminal in Downtown Santa Rosa. Hydrogen fuel cell technology is used in order to allow extended range services to be operated, contributing to increased ridership. \n",
- "61 Purchase of 5 zero-emission battery- electric buses and the construction of charging infrastructure to create a zero-emission over-the-road coach commuter route between the Greater Long Beach area and the University of California, Los Angeles (UCLA). \n",
- "73 Creates a zero-emission transit ecosystem that offers end-to-end solutions for residents, employees and the global audience drawn by tourism/convention centers and the LA 2028 Summer Olympics events. Project components include (1) purchase of 7 zero-emission battery electric vans to implement a new service connecting John Wayne Airport to Anaheim, (2) purchase of 10 electric vehicles and associated infrastructure to expand on-demand micro transit services into new neighborhoods and service areas, (3) purchase of 15 zeroemission buses to replace existing buses and augment existing routes, including installation of photovoltaic electricity generation at two facilities, and (4) purchase of 10 additional zero-emission buses for a new east/west connector service. \n",
- "74 Purchase of 6 zero emission microtransit buses to augment existing microtransit services and expand the service area, purchase 6 zeroemission school buses (in partnership with the Antelope Valley School Transportation Agency), and implements associated charging infrastructure. \n",
- "78 Project purchases 27 battery-electric busses for replacement and expansion, allowing for reduced headway service on two routes and a new route to connect the Glendale Transportation Center with Glendale Community College, and completing the transition to zeroemission for the Arroyo Verdugo Transit Operators, serving Glendale, La Cañada Flintridge, La Crescenta and Montrose. Also includes design and construction of a new parking deck to accommodate associated infrastructure and a photovoltaic (solar) canopy. Lastly, it provides upgrades to 400 bus stops, contactless payment options and a new smart phone application for riders. \n",
- "80 Purchase of 10 zero-emission electric buses and associated charging infrastructure to replace CNG and gas buses and implement service expansion between the City of Torrance and Downtown Los Angeles. Buses will be deployed in more frequent service on key routes, including services that will take advantage of bus priority lanes \n",
- "84 Procure 11 hydrogen fuel cell electric buses, design and install a hydrogen fueling station to provide fuel for the buses and for private and other fleet vehicles, and design and construct an intermodal transit and housing center. The buses will serve the local Trinidad to Scotia route as well as a new intercity route to Ukiah, connecting riders to Mendocino County and south to the SMART train and the San Francisco Bay Area. The hydrogen station and transit and housing center will both be located in low-income census tracts in downtown Eureka, the Humboldt County seat and largest city. \n",
- "85 Purchases 261 zero emission buses and supportive infrastructure to deploy on LA Metro's Tier 1 and Tier 2 routes from Divisions 9 and 18, as well as the J (Silver) Line. Project also includes corridor improvements on high frequency bus corridors (many shared with other transit agencies), including bus-only lanes, transit signal priority, bus bulbs and boarding islands, bus shelters, and real-time passenger information. \n",
- "92 Purchases eight battery-electric buses and 3 electric microtransit vans, continuing fleet conversion and allowing expansion of microtransit service into additional zones serving the City of Goleta, UC Santa Barbara, and the Goleta rail station. Funds general transit improvements including signal priority, contactless payment deployment, additional bike racks, and bus shelter improvements, and constructs facility improvements at two terminals including the construction of new ZEB infrastructure. \n",
- "93 Includes the purchase of 30 zero-emission buses and associated charging infrastructure and passenger amenities for Petaluma Transit, Santa Rosa CityBus and Sonoma County Transit, construction of the SMART Petaluma North commuter rail station, and improved network integration among all application partners and other transit operators in Sonoma County, including contactless payment equipment for Mendocino Transit Authority \n",
- "99 Purchases 40 zero-emission buses and associated infrastructure and implements a set of interrelated transit improvements. Includes service optimization improvements such as transit signal priority and other corridor improvements, installation of fare payment validators, and onboard passenger amenities. \n",
- "101 The Project expands frequency on two high‐performing routes and fully delivers phases 2‐4 of the City's bus charging infrastructure plan. This includes the implementation of a new vehicle charging system and utility upgrades, including construction of a charging canopy, to support fleet electrification efforts; and purchase up to 103 zero‐emission buses to replace existing CNG buses (which includes a mix of 30‐foot, 40‐foot and 60‐foot vehicles). \n",
- "102 The Project constructs a new transit center, linking city bus routes with regional routes, including to regional rail service; constructs a park‐and‐ride lot at the proposed new transit center; purchases 6 zero‐emission buses to replace existing CNG buses that will allow the applicant to operate three routes with zero‐emission buses. \n",
- "105 This project deploys 4 zero‐emission buses in Tribal and low‐income population regions, constructs a new transit center in Crescent City, expand existing intercity routes under a unified Redwood Coast Express brand to reduce transfers and increase ridership, and accelerate statewide efforts to develop a prototype fuel cell electric over‐the‐road coach (procuring 1 demonstration coach) \n",
- "110 The Project includes a set of interrelated investments with five components, including: Purchasing 33 expansion zero‐emission buses and associated infrastructure to expand service; Replacing 10 gas‐powered paratransit vehicles with zero‐emission vehicles to support an existing paratransit bus pilot effort; Implementation of transit signal priority along a key 12‐mile corridor; Installation of bicycle lockers and fast charging stations at targeted transit stations. \n",
- "117 Purchases 24 zero-emission buses to expand service frequency on the heavily traversed Highways 1 and Highway 17 corridors while also redeveloping the Watsonville Transit Station and Pacific Station to include more than 180 mixed‐use, affordable housing units and implements real time passenger information, bicycle amenities and other multimodal improvements. \n",
- "123 Constructs a new transit facility that can support a growing zero‐emission vehicle fleet and purchases 15 zero‐emission buses to implement a new on‐demand local bus service in five newly formed transit service zones within Yuba and Sutter counties. Also includes 1 zero‐emission over‐the‐road coach to implement a new commuter service to Roseville. \n",
- "\n",
- " project_type bus_count \\\n",
- "11 includes bus and non-bus components 12.0 \n",
- "29 includes bus and non-bus components 40.0 \n",
- "30 includes bus and non-bus components 7.0 \n",
- "33 includes bus and non-bus components 6.0 \n",
- "35 includes bus and non-bus components 10.0 \n",
- "52 includes bus and non-bus components 13.0 \n",
- "56 includes bus and non-bus components 11.0 \n",
- "60 includes bus and non-bus components 4.0 \n",
- "61 includes bus and non-bus components 5.0 \n",
- "73 includes bus and non-bus components 42.0 \n",
- "74 includes bus and non-bus components 12.0 \n",
- "78 includes bus and non-bus components 27.0 \n",
- "80 includes bus and non-bus components 10.0 \n",
- "84 includes bus and non-bus components 11.0 \n",
- "85 includes bus and non-bus components 261.0 \n",
- "92 includes bus and non-bus components 8.0 \n",
- "93 includes bus and non-bus components 30.0 \n",
- "99 includes bus and non-bus components 40.0 \n",
- "101 includes bus and non-bus components 103.0 \n",
- "102 includes bus and non-bus components 6.0 \n",
- "105 includes bus and non-bus components 4.0 \n",
- "110 includes bus and non-bus components 33.0 \n",
- "117 includes bus and non-bus components 24.0 \n",
- "123 includes bus and non-bus components 16.0 \n",
- "\n",
- " prop_type \n",
- "11 zero-emission bus (not specified) \n",
- "29 electric (not specified) \n",
- "30 electric (not specified) \n",
- "33 electric (not specified) \n",
- "35 electric (not specified) \n",
- "52 electric (not specified) \n",
- "56 electric (not specified) \n",
- "60 FCEB \n",
- "61 electric (not specified) \n",
- "73 BEB \n",
- "74 zero-emission bus (not specified) \n",
- "78 BEB \n",
- "80 electric (not specified) \n",
- "84 electric (not specified) \n",
- "85 zero-emission bus (not specified) \n",
- "92 electric (not specified) \n",
- "93 zero-emission bus (not specified) \n",
- "99 zero-emission bus (not specified) \n",
- "101 zero-emission bus (not specified) \n",
- "102 zero-emission bus (not specified) \n",
- "105 FCEB \n",
- "110 zero-emission bus (not specified) \n",
- "117 zero-emission bus (not specified) \n",
- "123 zero-emission bus (not specified) "
- ]
- },
- "execution_count": 9,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "bus_non_bus[[\"project_description\", \"project_type\", \"bus_count\", \"prop_type\"]]"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "c0aa374e-985b-46b3-a7ab-f3bc66e36204",
- "metadata": {
- "jp-MarkdownHeadingCollapsed": true,
- "tags": []
- },
- "source": [
- "## DEPRECATED - Data Analysis\n",
- "see `cost_per_bus_analysis` notebook"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "02cce57d-f82c-4f85-be53-814538b6b6c3",
- "metadata": {
- "tags": []
- },
- "source": [
- "### Consolidate up grant recipient name"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "a5d0e920-cfd9-465f-89eb-81214b27070a",
- "metadata": {
- "jp-MarkdownHeadingCollapsed": true,
- "tags": []
- },
- "source": [
- "### aggregate up"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "a0a396f0-c9ad-48bd-9767-45b5a8b53d25",
- "metadata": {},
- "outputs": [],
- "source": [
- "# aggregate # of buses and allocation by transit agency\n",
- "# bus_cost = only_bus.groupby('grant_recipient').agg({\n",
- "# '#_of_buses':\"sum\",\n",
- "# 'allocation_amount':'sum'\n",
- "# }).reset_index()\n",
- "\n",
- "bus_cost = (\n",
- " bus_only.groupby(\"grant_recipient\")\n",
- " .agg({\"bus_count\": \"sum\", \"tircp_award_amount_($)\": \"sum\"})\n",
- " .reset_index()\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "c5c8dfa3-310f-47bd-8d95-2eed37feec73",
- "metadata": {},
- "outputs": [],
- "source": [
- "# confirm aggregation worked\n",
- "bus_cost"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "0f074183-9110-41fc-820b-4483fe9b076b",
- "metadata": {
- "tags": []
- },
- "source": [
- "### create new cost per bus column"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "391fdd1a-585b-43e4-b70b-18c7f54a8263",
- "metadata": {},
- "outputs": [],
- "source": [
- "bus_cost[\"cost_per_bus\"] = (\n",
- " bus_cost[\"tircp_award_amount_($)\"] / bus_cost[\"bus_count\"]\n",
- ").astype(\"int64\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "3219820e-0d80-4c1b-92c5-f098f09a22a9",
- "metadata": {},
- "outputs": [],
- "source": [
- "# confirm new column was created and values were populated\n",
- "bus_cost.sort_values(\"cost_per_bus\")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "7c2df629-863e-476f-8f1c-934535a1feb0",
- "metadata": {},
- "source": [
- "### Export cost per bus via project tracking sheet to gcs"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "b7ad9fe7-a705-4138-8f3d-138f6d0146f6",
- "metadata": {},
- "outputs": [],
- "source": [
- "bus_cost.to_csv(\n",
- " \"gs://calitp-analytics-data/data-analyses/bus_procurement_cost/tircp_project_cost_per_bus.csv\"\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "b6946ba9-55e8-4e53-9da9-56310e9c3661",
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.9.13"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/bus_procurement_cost/tircp_data_cleaner.py b/bus_procurement_cost/tircp_data_cleaner.py
index 7c457bb50..0199742b7 100644
--- a/bus_procurement_cost/tircp_data_cleaner.py
+++ b/bus_procurement_cost/tircp_data_cleaner.py
@@ -2,28 +2,18 @@
import pandas as pd
import shared_utils
from calitp_data_analysis.sql import to_snakecase
-
-from dgs_data_cleaner import new_prop_finder, new_bus_size_finder, project_type_checker
-
-
-def col_row_updater(df: pd.DataFrame, col1: str, val1, col2: str, new_val):
- """
- function used to update values at specificed columns and row value.
- """
- df.loc[df[col1] == val1, col2] = new_val
-
- return
+from bus_cost_utils import *
def clean_tircp_columns() -> pd.DataFrame:
"""
main function that reads in and cleans TIRCP data.
"""
- from fta_data_cleaner import gcs_path
- file_name = "TIRCP Tracking Sheets 2_1-10-2024.xlsx"
+
+ file_name = "raw_TIRCP Tracking Sheets 2_1-10-2024.xlsx"
tircp_name = "Project Tracking"
# read in data
- df = pd.read_excel(f"{gcs_path}{file_name}", sheet_name=tircp_name)
+ df = pd.read_excel(f"{GCS_PATH}{file_name}", sheet_name=tircp_name)
# keep specific columns
keep_col = [
@@ -93,12 +83,12 @@ def clean_tircp_columns() -> pd.DataFrame:
df4 = df3.assign(
prop_type = df3['project_description'].apply(new_prop_finder),
bus_size_type = df3['project_description'].apply(new_bus_size_finder),
- new_project_type = df3['project_description'].apply(project_type_checker)
+ new_project_type = df3['project_description'].apply(project_type_finder)
)
return df4
-def agg_buses_only(df: pd.DataFrame) -> pd.DataFrame:
+def tircp_agg_bus_only(df: pd.DataFrame) -> pd.DataFrame:
"""
filters df to only include projects with bus procurement and for project type = bus only
does not include engineering, planning or construction only projects.
@@ -126,14 +116,14 @@ def agg_buses_only(df: pd.DataFrame) -> pd.DataFrame:
if __name__ == "__main__":
- from fta_data_cleaner import gcs_path
+
# initial df
df1 = clean_tircp_columns()
# aggregate
- df2 = agg_buses_only(df1)
+ df2 = tircp_agg_bus_only(df1)
# export both df's as parquets to GCS
- df1.to_parquet(f'{gcs_path}clean_tircp_project.parquet')
- df2.to_parquet(f'{gcs_path}clean_tircp_project_bus_only.parquet')
\ No newline at end of file
+ df1.to_parquet(f'{GCS_PATH}clean_tircp_all_project.parquet')
+ df2.to_parquet(f'{GCS_PATH}clean_tircp_bus_only.parquet')
\ No newline at end of file