From eaaac58863e6c34533afdcf32637204455fce206 Mon Sep 17 00:00:00 2001 From: Shweta Adhikari Date: Thu, 18 Jan 2024 23:03:22 +0000 Subject: [PATCH 1/4] ntd_2021_2022_crosswalk --- Untitled.ipynb | 33 + ntd/ntd_id_changes_crosswalk.ipynb | 2349 ++++++++++++++++++++++++++++ 2 files changed, 2382 insertions(+) create mode 100644 Untitled.ipynb create mode 100644 ntd/ntd_id_changes_crosswalk.ipynb diff --git a/Untitled.ipynb b/Untitled.ipynb new file mode 100644 index 000000000..063c20ec1 --- /dev/null +++ b/Untitled.ipynb @@ -0,0 +1,33 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "834539b7-de8e-47ca-bb5c-30bc55fa1e2a", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/ntd/ntd_id_changes_crosswalk.ipynb b/ntd/ntd_id_changes_crosswalk.ipynb new file mode 100644 index 000000000..33ec347e3 --- /dev/null +++ b/ntd/ntd_id_changes_crosswalk.ipynb @@ -0,0 +1,2349 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "207db9ce-8673-4dcd-be52-21ee4ed8d748", + "metadata": {}, + "source": [ + "# NTD 2021 vs 2022\n", + "\n", + "* Explore where / how much `ntd_id` has changed between 2021 and 2022 exports. \n", + "* Use BigQuery, from `mart_ntd` grab 2021 and 2022 and export as csv.\n", + "* Pass it through a variety of merges can help winnow down which ones we do need to manually reconcile.\n", + "* Go from more stringent merges (ids and names) to looser merges\n", + " * Parsing the `ntd_id` and grabbing the suffix portion can help, since there's a good batch where the `ntd_id` change in 2022 is a new prefix added, where `ntd_id_2022 = [xxxx-ntd_id_2021]`." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "f1cd334d-29c7-4ea8-8530-1bde520627e6", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "#GCS_BUCKET = \"gs://calitp-ntd-data-products\"\n", + "#GCS_PATH = (f\"{GCS_BUCKET}annual-database-agency-information/\"\n", + "# \"dt=2023-11-15/ts=2023-11-15T22:29:51.925030+00:00/year=2022/\"\n", + "# \"annual-database-agency-information.jsonl.gz\"\n", + "# )\n", + "\n", + "LOCAL_PATH = \"ntd_2021_2022.csv\"" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "85b4ef76-811d-499e-94c1-166c9cd4b0d5", + "metadata": {}, + "outputs": [], + "source": [ + "df_full = pd.read_csv(LOCAL_PATH)\n", + "\n", + "df_2021 = df_full[df_full.year==2021].reset_index(drop=True)\n", + "df_2022 = df_full[df_full.year==2022].reset_index(drop=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "07b0ddbf-c3cb-421e-84e3-5f1a1c5c7b4b", + "metadata": {}, + "outputs": [], + "source": [ + "def basic_stats(df: pd.DataFrame): \n", + " cols = [\"ntd_id\", \"legacy_ntd_id\", \n", + " \"reported_by_name\", \n", + " \"agency_name\",\n", + " \"city\"\n", + " ]\n", + " for c in cols:\n", + " print(f\"nunique {c}: {df[c].nunique()}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "efdc6f10-95e0-4383-a903-f7a45d39a009", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "nunique ntd_id: 3021\n", + "nunique legacy_ntd_id: 2110\n", + "nunique reported_by_name: 64\n", + "nunique agency_name: 2929\n", + "nunique city: 1974\n" + ] + } + ], + "source": [ + "basic_stats(df_2021)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "4946c23e-1f78-47ea-a800-7ef17fc0b088", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "nunique ntd_id: 2969\n", + "nunique legacy_ntd_id: 2092\n", + "nunique reported_by_name: 60\n", + "nunique agency_name: 2924\n", + "nunique city: 1964\n" + ] + } + ], + "source": [ + "basic_stats(df_2022)" + ] + }, + { + "cell_type": "markdown", + "id": "d4c30967-7586-4716-95fe-c9a8f838a98a", + "metadata": {}, + "source": [ + "## Full set of merge columns\n", + "\n", + "* `ntd_id, legacy_ntd_id, agency_name, reported_by_name, city`\n", + "\n", + "Probably the most complete set of identifiers" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "bdc41b2e-27be-45d2-9416-12067cf8607a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "left_only 1891\n", + "right_only 1887\n", + "both 1130\n", + "Name: _merge, dtype: int64" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cols = [\"ntd_id\", \"legacy_ntd_id\", \n", + " \"reported_by_name\", \n", + " \"agency_name\",\n", + " \"city\", \n", + " ]\n", + "\n", + "m1 = pd.merge(\n", + " df_2021[cols + [\"key\", \"year\"]],\n", + " df_2022[cols + [\"key\", \"year\"]],\n", + " on = cols,\n", + " how = \"outer\",\n", + " indicator = True\n", + ")\n", + "\n", + "m1._merge.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "0e780b65-8e21-47c4-918f-c11d9433d1a6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "left_only 0.385289\n", + "right_only 0.384474\n", + "both 0.230236\n", + "Name: _merge, dtype: float64" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "m1._merge.value_counts(normalize=True)" + ] + }, + { + "cell_type": "markdown", + "id": "78610d5e-e499-4117-bf37-19d7b0e3f516", + "metadata": {}, + "source": [ + "### Majority will merge if we don't merge on `ntd_id`, but use `legacy_ntd_id` and variations of name instead\n", + "\n", + "These could be solved if we just use `agency_name` and `reported_by_name`. Even though `ntd_id` is not necessarily the same, `legacy_ntd_id` appears to be (even if same means it's NaN for both years).\n", + "\n", + "Exclude `city` from merge, since there are some that change cities, but it's the same agency. We do want to know if city changes from year to year." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "55f77342-1d83-463d-863a-ad5e2144ab57", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ntd_idlegacy_ntd_idreported_by_nameagency_namecitykey_xyear_xkey_yyear_y_merge
3614702427R01-015Iowa Department of Transportation10-15 Regional Transit AgencyOttumwaNaNNaN49ba215d3793e5a58366b7dde1cb587e2022.0right_only
20027R01-702427R01-015Iowa Department of Transportation10-15 Regional Transit AgencyOttumwa4400da6a95a6a328ab9b7f651a1bdeb42021.0NaNNaNleft_only
963A0002-55329NaNStark Area Regional Transit AuthorityABCD, Inc.Canton463265c2d666621a7757630e5548fc3b2021.0NaNNaNleft_only
453655329NaNStark Area Regional Transit AuthorityABCD, Inc.CantonNaNNaN16b0e8ca1108b744db0f7952d8f6b3812022.0right_only
321188285NaNColorado Department of TransportationAEX - Alpine ExpressGunnisonNaNNaN00fc807db5fbd510c447e71e21069b192022.0right_only
.................................
21829R02-910709R02-019California Department of TransportationYosemite Area Regional Transportation SystemMercedf68cf9996e537ff2968339f4508dd60d2021.0NaNNaNleft_only
4269910709R02-019California Department of TransportationYosemite Area Regional Transportation SystemMercedNaNNaN1e8e914f03d459189a27bcdc2fc321062022.0right_only
442266320NaNTexas Department of TransportationZapata CountyZapataNaNNaNe19ba955b67fff1dc3fd906aad5ab8732022.0right_only
13906R05-66320NaNTexas Department of TransportationZapata CountyZapatadc5f81697aabb1d127a4c03225a6cdde2021.0NaNNaNleft_only
20256R04-66305NaNOklahoma Department of Transportationcity of MarlowMarlow05ff80e6366b6af2a59d99020f5f0d302021.0NaNNaNleft_only
\n", + "

3778 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " ntd_id legacy_ntd_id reported_by_name \\\n", + "3614 70242 7R01-015 Iowa Department of Transportation \n", + "2002 7R01-70242 7R01-015 Iowa Department of Transportation \n", + "963 A0002-55329 NaN Stark Area Regional Transit Authority \n", + "4536 55329 NaN Stark Area Regional Transit Authority \n", + "3211 88285 NaN Colorado Department of Transportation \n", + "... ... ... ... \n", + "2182 9R02-91070 9R02-019 California Department of Transportation \n", + "4269 91070 9R02-019 California Department of Transportation \n", + "4422 66320 NaN Texas Department of Transportation \n", + "1390 6R05-66320 NaN Texas Department of Transportation \n", + "2025 6R04-66305 NaN Oklahoma Department of Transportation \n", + "\n", + " agency_name city \\\n", + "3614 10-15 Regional Transit Agency Ottumwa \n", + "2002 10-15 Regional Transit Agency Ottumwa \n", + "963 ABCD, Inc. Canton \n", + "4536 ABCD, Inc. Canton \n", + "3211 AEX - Alpine Express Gunnison \n", + "... ... ... \n", + "2182 Yosemite Area Regional Transportation System Merced \n", + "4269 Yosemite Area Regional Transportation System Merced \n", + "4422 Zapata County Zapata \n", + "1390 Zapata County Zapata \n", + "2025 city of Marlow Marlow \n", + "\n", + " key_x year_x \\\n", + "3614 NaN NaN \n", + "2002 4400da6a95a6a328ab9b7f651a1bdeb4 2021.0 \n", + "963 463265c2d666621a7757630e5548fc3b 2021.0 \n", + "4536 NaN NaN \n", + "3211 NaN NaN \n", + "... ... ... \n", + "2182 f68cf9996e537ff2968339f4508dd60d 2021.0 \n", + "4269 NaN NaN \n", + "4422 NaN NaN \n", + "1390 dc5f81697aabb1d127a4c03225a6cdde 2021.0 \n", + "2025 05ff80e6366b6af2a59d99020f5f0d30 2021.0 \n", + "\n", + " key_y year_y _merge \n", + "3614 49ba215d3793e5a58366b7dde1cb587e 2022.0 right_only \n", + "2002 NaN NaN left_only \n", + "963 NaN NaN left_only \n", + "4536 16b0e8ca1108b744db0f7952d8f6b381 2022.0 right_only \n", + "3211 00fc807db5fbd510c447e71e21069b19 2022.0 right_only \n", + "... ... ... ... \n", + "2182 NaN NaN left_only \n", + "4269 1e8e914f03d459189a27bcdc2fc32106 2022.0 right_only \n", + "4422 e19ba955b67fff1dc3fd906aad5ab873 2022.0 right_only \n", + "1390 NaN NaN left_only \n", + "2025 NaN NaN left_only \n", + "\n", + "[3778 rows x 10 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "m1[m1._merge != \"both\"].sort_values(\"agency_name\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "6d3a3ac0-345d-4931-9e75-9220dada96ce", + "metadata": {}, + "outputs": [], + "source": [ + "# Remove the keys that would now merge between the years\n", + "ok_keys = np.concatenate((\n", + " m1[m1._merge==\"both\"].key_x.unique(),\n", + " m1[m1._merge==\"both\"].key_y.unique()\n", + "))" + ] + }, + { + "cell_type": "markdown", + "id": "aefb7d2b-d1dc-4690-be39-622907dd9bb0", + "metadata": {}, + "source": [ + "### Merge on `legacy_ntd_id`, variations of name\n", + "These probably need to be manually addressed using a crosswalk, since we want to store variations of the `agency_name` over time." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "8129207f-dc83-4d2b-8a11-5238a0effed7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "both 1789\n", + "left_only 102\n", + "right_only 98\n", + "Name: _merge, dtype: int64" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Remove city from merge, since there are a couple\n", + "# that would merge but are set to diff cities\n", + "m2 = pd.merge(\n", + " df_2021[~df_2021.key.isin(ok_keys)][cols + [\"key\", \"year\"]],\n", + " df_2022[~df_2022.key.isin(ok_keys)][cols + [\"key\", \"year\"]],\n", + " on = [\"legacy_ntd_id\", \"reported_by_name\", \"agency_name\"],\n", + " how = \"outer\",\n", + " indicator = True\n", + ")\n", + "\n", + "m2._merge.value_counts()" + ] + }, + { + "cell_type": "markdown", + "id": "c1db5321-c1b4-41f4-a024-d6c8deb975e7", + "metadata": {}, + "source": [ + "Some of these are clearly the same agency when you spot check it (abbreviations, minor changes in name, etc), but some are less obvious. Might have to start compiling a larger crosswalk of variations on agency name.\n", + "\n", + "These would be grouped together (`ntd_id` changes...but with an additional prefix)\n", + "* Whitley County Commissioners\n", + "* Whitley County Council on Aging" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "a1288ff4-ce8d-4dea-b192-0846edd0482d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ntd_id_xlegacy_ntd_idreported_by_nameagency_namecity_xkey_xyear_xntd_id_ycity_ykey_yyear_y_merge
13426R02-66284NaNLouisiana Department of TransportationAcadia COACrowley5127e6c6b2c3c3018cc2629cfa94f74e2021.0NaNNaNNaNNaNleft_only
1692A0004-00415NaNValley Regional TransitAda County Highway DistrictBoise8b1ef6431abe5e9a5eb5c0a1124477d92021.0NaNNaNNaNNaNleft_only
1928NaN5R02-020Indiana Department of TransportationArea 10 Council on Aging of Monroe CountyNaNNaNNaN50308Ellettsville00d4f9e499a70f66a8a72c0f4c42d4082022.0right_only
1971NaN5R02-017Indiana Department of TransportationArea IV Agency on Aging and Community Action P...NaNNaNNaN50365Lafayettea561a8b8df6a37f2dd4f141e4902d3462022.0right_only
1937NaNNaNArizona Department of TransportationAssist to IndependenceNaNNaNNaN99466Tuba City42b9d82b9be91d4b150f55f9e88a809f2022.0right_only
.......................................
15775R02-504685R02-039Indiana Department of TransportationWhitley County CommissionersColumbia City518ace07cb87072ec541dc7fef24e6f72021.0NaNNaNNaNNaNleft_only
1973NaN5R02-039Indiana Department of TransportationWhitley County Council on AgingNaNNaNNaN50468Columbia City0b058ae9f5d5a1e9780257c981a6b91c2022.0right_only
16866R02-66299NaNLouisiana Department of TransportationWinn COAWinnfield57e123ed53fdf3ce74769f216cc2c39d2021.0NaNNaNNaNNaNleft_only
1933NaN5R02-024Indiana Department of TransportationYMCA of VincennesNaNNaNNaN50392Vincennes13d170d217ad0cf7860e62ce136510042022.0right_only
12486R04-66305NaNOklahoma Department of Transportationcity of MarlowMarlow05ff80e6366b6af2a59d99020f5f0d302021.0NaNNaNNaNNaNleft_only
\n", + "

200 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " ntd_id_x legacy_ntd_id reported_by_name \\\n", + "1342 6R02-66284 NaN Louisiana Department of Transportation \n", + "1692 A0004-00415 NaN Valley Regional Transit \n", + "1928 NaN 5R02-020 Indiana Department of Transportation \n", + "1971 NaN 5R02-017 Indiana Department of Transportation \n", + "1937 NaN NaN Arizona Department of Transportation \n", + "... ... ... ... \n", + "1577 5R02-50468 5R02-039 Indiana Department of Transportation \n", + "1973 NaN 5R02-039 Indiana Department of Transportation \n", + "1686 6R02-66299 NaN Louisiana Department of Transportation \n", + "1933 NaN 5R02-024 Indiana Department of Transportation \n", + "1248 6R04-66305 NaN Oklahoma Department of Transportation \n", + "\n", + " agency_name city_x \\\n", + "1342 Acadia COA Crowley \n", + "1692 Ada County Highway District Boise \n", + "1928 Area 10 Council on Aging of Monroe County NaN \n", + "1971 Area IV Agency on Aging and Community Action P... NaN \n", + "1937 Assist to Independence NaN \n", + "... ... ... \n", + "1577 Whitley County Commissioners Columbia City \n", + "1973 Whitley County Council on Aging NaN \n", + "1686 Winn COA Winnfield \n", + "1933 YMCA of Vincennes NaN \n", + "1248 city of Marlow Marlow \n", + "\n", + " key_x year_x ntd_id_y city_y \\\n", + "1342 5127e6c6b2c3c3018cc2629cfa94f74e 2021.0 NaN NaN \n", + "1692 8b1ef6431abe5e9a5eb5c0a1124477d9 2021.0 NaN NaN \n", + "1928 NaN NaN 50308 Ellettsville \n", + "1971 NaN NaN 50365 Lafayette \n", + "1937 NaN NaN 99466 Tuba City \n", + "... ... ... ... ... \n", + "1577 518ace07cb87072ec541dc7fef24e6f7 2021.0 NaN NaN \n", + "1973 NaN NaN 50468 Columbia City \n", + "1686 57e123ed53fdf3ce74769f216cc2c39d 2021.0 NaN NaN \n", + "1933 NaN NaN 50392 Vincennes \n", + "1248 05ff80e6366b6af2a59d99020f5f0d30 2021.0 NaN NaN \n", + "\n", + " key_y year_y _merge \n", + "1342 NaN NaN left_only \n", + "1692 NaN NaN left_only \n", + "1928 00d4f9e499a70f66a8a72c0f4c42d408 2022.0 right_only \n", + "1971 a561a8b8df6a37f2dd4f141e4902d346 2022.0 right_only \n", + "1937 42b9d82b9be91d4b150f55f9e88a809f 2022.0 right_only \n", + "... ... ... ... \n", + "1577 NaN NaN left_only \n", + "1973 0b058ae9f5d5a1e9780257c981a6b91c 2022.0 right_only \n", + "1686 NaN NaN left_only \n", + "1933 13d170d217ad0cf7860e62ce13651004 2022.0 right_only \n", + "1248 NaN NaN left_only \n", + "\n", + "[200 rows x 12 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "m2[m2._merge != \"both\"].sort_values([\"agency_name\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "c3e6dd34-1eea-421e-819a-c7b69431352a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Acadia COA', 'Ada County Highway District',\n", + " 'Area 10 Council on Aging of Monroe County',\n", + " 'Area IV Agency on Aging and Community Action Programs ',\n", + " 'Assist to Independence', 'Atlanta-Region Transit Link Authority',\n", + " 'Autonomous Municipality of Vega Alta', 'Bacon County',\n", + " 'Baltimore County Department of Aging',\n", + " 'Baltimore County Department of Public Works Transportation',\n", + " 'Bay State Cruise Company', 'Bay State LLC', 'Berrien County',\n", + " 'Blue River Services ', 'Boone County Commissioners',\n", + " 'Boone County Senior Services', 'Boulder, City of',\n", + " 'Brantley County', 'Brooks County Transit',\n", + " 'Brown County Senior Citizens Council', 'Brown County YMCA',\n", + " 'Buckeye Community Services', 'CENTRAL MISSISSIPPI INC',\n", + " 'CHANDLER, CITY OF', 'Cache Employment & Training Center (CETC)',\n", + " 'Calcasieu Voluntary Council in Aging', 'Cardinal Services ',\n", + " 'Cass County Commissioners', 'Cass County Council on Aging ',\n", + " 'Central City ', 'Central Mississippi, Incorporated',\n", + " 'City of Benkelman', 'City of Buchanan', 'City of Burlington',\n", + " 'City of Casa Grande', 'City of Dinuba', 'City of Elk Grove',\n", + " 'City of Modesto ', 'City of Tulare', 'City of Woodlake',\n", + " 'Clay Senior & Community Services, Inc.',\n", + " 'Clinton County Commissioners',\n", + " 'Commission On Aging Family Services Inc., The',\n", + " 'Common Ground Outdoor Adventures',\n", + " 'Concordia Senior Citizen Center',\n", + " 'Confederated Tribes of Warm Springs', 'Cook County Transit',\n", + " 'Coquille Indian Tribe', 'Council of Senior Tyler Countians Inc',\n", + " 'Coweta County', 'DATTCO, Inc. ', 'DeKalb County Commissioners',\n", + " 'DeKalb County Council on Aging ', 'Donaldsonville ARC',\n", + " 'Dynamic Dimensions, Inc.', 'Easterseals Blake Foundation',\n", + " 'Elko Band Council', 'Enterprise Holdings, LLC',\n", + " 'Enterprise Rideshare - Michigan',\n", + " 'Fayette Community Commissioners',\n", + " 'Fayette County Council Agency on Aging and Aged',\n", + " 'Four Rivers Resource Services', 'Franklin County Commissioners',\n", + " 'Franklin County Senior Services ', 'Friends of Hansford Center',\n", + " 'Fulton County Commissioners', 'Fulton County Council on Aging ',\n", + " 'GOODYEAR, CITY OF',\n", + " 'Greater Cincinnati Behavioral Health Services ',\n", + " 'Greenbrier Co. Committee on Aging, Inc.', 'Greyhound Lines, Inc.',\n", + " 'Hamilton County', 'Hancock County, Indiana',\n", + " 'Hardy County Committee on Aging Inc',\n", + " 'Harrison County Commissioners',\n", + " 'Helping Ourselves Pursue Enrichment Incorporated',\n", + " 'High Valley Transit District', 'Historic Hoosier Hills',\n", + " 'Huntington County Commissioners',\n", + " 'Huntington County Council on Aging ',\n", + " 'Indianapolis and Marion County Public Transportation',\n", + " 'Isanti County', 'Jalbert Leasing, Inc. ',\n", + " 'Jay County Commissioners', 'Jefferson Bus Lines', 'Jefferson COA',\n", + " 'Kane County Senior Citizens Improvement Corp',\n", + " 'Knox County Commissioners', 'Kosciusko County Commissioners',\n", + " 'LIFT COMMUNITY ACTION AGENCY INC LIFT',\n", + " 'LaGrange County Commissioners',\n", + " 'LaGrange County Council on Aging ', 'Lafayette COA',\n", + " 'LifeStream Services ', 'LifeTime Resources ',\n", + " 'Little Dixie Community Action Agency, Inc.',\n", + " 'Living Wells in Wabash County Council on Aging ',\n", + " 'Lowndes County', 'MESA, CITY OF', 'Marshall County Commissioners',\n", + " 'Marshall County Council on Aging ', 'Miami County Commissioners',\n", + " 'Miami County YMCA ', 'Michigan Department of Transportation',\n", + " 'Monroe County Commissioners', 'Monroe County Council on Aging',\n", + " 'Municipality of Anasco', 'Municipality of Añasco',\n", + " 'Municipality of Comerio', 'NEW BOURBON REGIONAL PORT AUTH',\n", + " 'Native Village of Crooked Creek',\n", + " 'Neighbor Network/Pleasants County Senior Services',\n", + " 'New Bourbon Regional Port Authority',\n", + " 'New Horizons Disability Empowerment Center',\n", + " 'Noble County Commissioners', 'Noble County Council on Aging ',\n", + " 'Northeast Transportation Services', 'Orange County Commissioners',\n", + " 'Orange County First Chance Center ', 'Ouachita ARC',\n", + " 'Paul Philippe Resource Center ', 'Philadelphia Transit',\n", + " 'Pierce County Transit',\n", + " 'Pioneer Valley Transit Authority Intercity Bus',\n", + " 'Piute County Senior Citizen Center',\n", + " 'Plaquemines Parish Government',\n", + " 'Plaquemines Port Harbor Terminal District',\n", + " 'Playa Vista Parks and Landscape Corp.',\n", + " 'Port of Administration Authority',\n", + " 'Puerto Rico Highway and Transportation Authority - Público',\n", + " 'Puerto Rico Highway and Transportation Authority – Público',\n", + " 'Raleigh County Commission on Aging',\n", + " 'Regional Transportation Authority of Pima County',\n", + " 'Ritchie County Integrated Family Services',\n", + " 'Rush County Commissioners', 'Rush County Senior Services ',\n", + " 'Saguaro Foundation Community Living Programs',\n", + " 'Salt Lake Express', 'Sevier County ', 'Shelby Senior Services ',\n", + " 'Shoshoni Senior Citizens', 'Shoshoni Senior Citizens Center',\n", + " 'Silver Key Senior Services, Inc', 'Sistersville Ferry ',\n", + " 'Southern Georgia Regional Commission',\n", + " 'Southern Indiana Development Commission', 'Sphere, Inc',\n", + " 'Stanislaus County ', 'Stanislaus Regional Transit Authority',\n", + " 'Steuben County Commissioners', 'Steuben County Council on Aging',\n", + " 'Sunrise Northeast, Inc.', 'Taos Pueblo', 'The Shuttle, Inc.',\n", + " 'Tift Transit System', 'Tippecanoe County Commissioners',\n", + " 'Town of Avon (CT)', 'Town of Beacon Falls', 'Town of Canton',\n", + " 'Town of Florence', 'Town of Limon', 'Town of Litchfield',\n", + " 'Town of Mansfield', 'Town of Montville', 'Town of Morrisville',\n", + " 'Town of Oro Valley ', 'Town of Prescott Valley',\n", + " 'Town of Roxbury', 'Town of Sprague', 'Town of Wallkill',\n", + " 'Town of springerville AZ',\n", + " 'Transit Management of Central Maryland, Inc.',\n", + " 'Tucker County Senior Citizens',\n", + " 'Tulare County Regional Transit Agency', 'Turner County ',\n", + " 'Uintah Healthcare Special Service District',\n", + " 'Union County Commissioners', 'Union County Council on Aging ',\n", + " 'Verde Valley Caregivers Coalition',\n", + " 'Vermont Agency of Transportation', 'Wabash County Commissioners',\n", + " 'Ware County',\n", + " 'Washington County (on behalf of Council On Aging)',\n", + " 'Wells County Commissioners', 'Wells County Council on Aging ',\n", + " 'Wet Mountain Valley Rotary Community Service Inc.',\n", + " 'Wet Mountain Valley Rotary Community Service, Inc.',\n", + " 'White County Commissioners', 'White County Council on Aging ',\n", + " 'Whitley County Commissioners', 'Whitley County Council on Aging ',\n", + " 'Winn COA', 'YMCA of Vincennes ', 'city of Marlow'], dtype=object)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "m2[m2._merge != \"both\"].sort_values([\"agency_name\"]).agency_name.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "574bdf2c-ec56-44c9-932f-49f7132c65e5", + "metadata": {}, + "outputs": [], + "source": [ + "# Remove the keys that would now merge between the years\n", + "ok_keys2 = np.concatenate((\n", + " ok_keys,\n", + " m2[m2._merge == \"both\"].key_x.unique(),\n", + " m2[m2._merge == \"both\"].key_y.unique()\n", + "))" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "c455cd0e-3722-40f5-b721-4aa1c3f48dab", + "metadata": {}, + "outputs": [], + "source": [ + "def ntd_id_parsed(df: pd.DataFrame):\n", + " df = df.assign(\n", + " ntd_id_no_prefix = df.apply(\n", + " lambda x:\n", + " x.ntd_id.split(\"-\")[1] if \"-\" in x.ntd_id\n", + " else x.ntd_id, \n", + " axis=1)\n", + " )\n", + " \n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "6f310097-f6c0-4a55-8231-ddf7eb9b69fe", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "left_only 61\n", + "right_only 54\n", + "both 41\n", + "Name: _merge, dtype: int64" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "m3 = pd.merge(\n", + " df_2021[~df_2021.key.isin(ok_keys2)][cols + [\"key\", \"year\"]].pipe(ntd_id_parsed),\n", + " df_2022[~df_2022.key.isin(ok_keys2)][cols + [\"key\", \"year\"]].pipe(ntd_id_parsed),\n", + " on = [\"ntd_id_no_prefix\", \"legacy_ntd_id\",],\n", + " how = \"outer\",\n", + " indicator = True\n", + ")\n", + "\n", + "m3._merge.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "id": "fa780a5a-81d0-4953-b7ef-b83312fedd75", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ntd_id_xlegacy_ntd_idreported_by_name_xagency_name_xcity_xkey_xyear_xntd_id_no_prefixntd_id_yreported_by_name_yagency_name_ycity_ykey_yyear_y_merge
05R02-503995R02-004Indiana Department of TransportationFayette Community CommissionersConnersvilleeeae713c46c405e3c8d4f001df04ab082021.05039950399Indiana Department of TransportationFayette County Council Agency on Aging and AgedConnersville4e3cd53439c5b16a8db255353ab5f4522022.0both
11670T19NaNNative Village of Crooked CreekCrooked Creek5f174c276fcb5bdafe25d2d81bc90dc92021.0167NaNNaNNaNNaNNaNNaNleft_only
\n", + "
" + ], + "text/plain": [ + " ntd_id_x legacy_ntd_id reported_by_name_x \\\n", + "0 5R02-50399 5R02-004 Indiana Department of Transportation \n", + "1 167 0T19 NaN \n", + "\n", + " agency_name_x city_x \\\n", + "0 Fayette Community Commissioners Connersville \n", + "1 Native Village of Crooked Creek Crooked Creek \n", + "\n", + " key_x year_x ntd_id_no_prefix ntd_id_y \\\n", + "0 eeae713c46c405e3c8d4f001df04ab08 2021.0 50399 50399 \n", + "1 5f174c276fcb5bdafe25d2d81bc90dc9 2021.0 167 NaN \n", + "\n", + " reported_by_name_y \\\n", + "0 Indiana Department of Transportation \n", + "1 NaN \n", + "\n", + " agency_name_y city_y \\\n", + "0 Fayette County Council Agency on Aging and Aged Connersville \n", + "1 NaN NaN \n", + "\n", + " key_y year_y _merge \n", + "0 4e3cd53439c5b16a8db255353ab5f452 2022.0 both \n", + "1 NaN NaN left_only " + ] + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "m3.head(2)" + ] + }, + { + "cell_type": "markdown", + "id": "ca15336a-42d3-4d8e-94c1-9142bd4f5394", + "metadata": { + "tags": [] + }, + "source": [ + "### Parsing `ntd_id` into a no-prefix version can help \n", + "\n", + "If we are going to remove the prefix...we could do this earlier and hopefully get more to merge. Although, this does mean that we are left with variations on `agency_name` and `reported_by_name`, which still needs to make it into our crosswalk, even if we do not use it to merge.\n", + "\n", + "Left with a batch of about 60 to reconcile manually." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "92906541-2a8f-46aa-aae0-7bc60d892fe1", + "metadata": {}, + "outputs": [], + "source": [ + "m1_table = m1.loc[m1['_merge'] == \"both\", ['ntd_id', 'reported_by_name','legacy_ntd_id','agency_name','city']]" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "b18cdc3d-cedf-43c4-bbf5-e084e24d5a3b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ntd_idreported_by_namelegacy_ntd_idagency_namecity
111239NaNNaNHyannis Harbor Tours, Inc.Hyannis
221NaN21Whatcom Transportation AuthorityBellingham
630010NaN3010Lehigh and Northampton Transportation AuthorityAllentown
730041NaN3041Board of Commissioners of Allegany County Mary...Cumberland
830201NaNNaNCity of BaltimoreBaltimore
..................
299950020NaN5020City of Springfield, OhioSpringfield
300050047NaN5047Bloomington-Normal Public Transit SystemNormal
300766194NaN6T14Cheyenne & Arapaho TribesConcho
300970032NaN7032City of St. Joseph, MissouriSaint Joseph
30169R03NaN9R03Hawaii Department of TransportationHonolulu
\n", + "

1130 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " ntd_id reported_by_name legacy_ntd_id \\\n", + "1 11239 NaN NaN \n", + "2 21 NaN 21 \n", + "6 30010 NaN 3010 \n", + "7 30041 NaN 3041 \n", + "8 30201 NaN NaN \n", + "... ... ... ... \n", + "2999 50020 NaN 5020 \n", + "3000 50047 NaN 5047 \n", + "3007 66194 NaN 6T14 \n", + "3009 70032 NaN 7032 \n", + "3016 9R03 NaN 9R03 \n", + "\n", + " agency_name city \n", + "1 Hyannis Harbor Tours, Inc. Hyannis \n", + "2 Whatcom Transportation Authority Bellingham \n", + "6 Lehigh and Northampton Transportation Authority Allentown \n", + "7 Board of Commissioners of Allegany County Mary... Cumberland \n", + "8 City of Baltimore Baltimore \n", + "... ... ... \n", + "2999 City of Springfield, Ohio Springfield \n", + "3000 Bloomington-Normal Public Transit System Normal \n", + "3007 Cheyenne & Arapaho Tribes Concho \n", + "3009 City of St. Joseph, Missouri Saint Joseph \n", + "3016 Hawaii Department of Transportation Honolulu \n", + "\n", + "[1130 rows x 5 columns]" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "m1_table" + ] + }, + { + "cell_type": "markdown", + "id": "cd6e7fa6-1e25-4d61-bdd3-fcd932b3a623", + "metadata": {}, + "source": [ + "### Merging all the crosswalks to create one table" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "74bd172e-2585-4e9b-9bde-c3de85ef05e1", + "metadata": {}, + "outputs": [], + "source": [ + "m1_table['ntd_id_2021'] = m1_table['ntd_id']" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "7c82c4cc-d32f-4803-a487-f8cda9f41db0", + "metadata": {}, + "outputs": [], + "source": [ + "m1_table.rename(columns={'ntd_id' : 'ntd_id_2022'}, inplace = True)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "9962b97f-8254-422d-a7aa-38e9f22cb391", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ntd_id_2022reported_by_namelegacy_ntd_idagency_namecityntd_id_2021
111239NaNNaNHyannis Harbor Tours, Inc.Hyannis11239
221NaN21Whatcom Transportation AuthorityBellingham21
630010NaN3010Lehigh and Northampton Transportation AuthorityAllentown30010
730041NaN3041Board of Commissioners of Allegany County Mary...Cumberland30041
830201NaNNaNCity of BaltimoreBaltimore30201
.....................
299950020NaN5020City of Springfield, OhioSpringfield50020
300050047NaN5047Bloomington-Normal Public Transit SystemNormal50047
300766194NaN6T14Cheyenne & Arapaho TribesConcho66194
300970032NaN7032City of St. Joseph, MissouriSaint Joseph70032
30169R03NaN9R03Hawaii Department of TransportationHonolulu9R03
\n", + "

1130 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " ntd_id_2022 reported_by_name legacy_ntd_id \\\n", + "1 11239 NaN NaN \n", + "2 21 NaN 21 \n", + "6 30010 NaN 3010 \n", + "7 30041 NaN 3041 \n", + "8 30201 NaN NaN \n", + "... ... ... ... \n", + "2999 50020 NaN 5020 \n", + "3000 50047 NaN 5047 \n", + "3007 66194 NaN 6T14 \n", + "3009 70032 NaN 7032 \n", + "3016 9R03 NaN 9R03 \n", + "\n", + " agency_name city \\\n", + "1 Hyannis Harbor Tours, Inc. Hyannis \n", + "2 Whatcom Transportation Authority Bellingham \n", + "6 Lehigh and Northampton Transportation Authority Allentown \n", + "7 Board of Commissioners of Allegany County Mary... Cumberland \n", + "8 City of Baltimore Baltimore \n", + "... ... ... \n", + "2999 City of Springfield, Ohio Springfield \n", + "3000 Bloomington-Normal Public Transit System Normal \n", + "3007 Cheyenne & Arapaho Tribes Concho \n", + "3009 City of St. Joseph, Missouri Saint Joseph \n", + "3016 Hawaii Department of Transportation Honolulu \n", + "\n", + " ntd_id_2021 \n", + "1 11239 \n", + "2 21 \n", + "6 30010 \n", + "7 30041 \n", + "8 30201 \n", + "... ... \n", + "2999 50020 \n", + "3000 50047 \n", + "3007 66194 \n", + "3009 70032 \n", + "3016 9R03 \n", + "\n", + "[1130 rows x 6 columns]" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "m1_table" + ] + }, + { + "cell_type": "markdown", + "id": "51922a0e-d710-40b9-b6f3-f4350c89e2af", + "metadata": {}, + "source": [ + "Filtering rows with successful crosswalk and selecting few columns to concatenate" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "7d30483d-47e9-480b-98c4-50e6143db933", + "metadata": {}, + "outputs": [], + "source": [ + "m2_table = m2.loc[m2['_merge'] == \"both\", ['ntd_id_x', 'reported_by_name','legacy_ntd_id','agency_name','city_y','ntd_id_y']]" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "cf74a45d-d6cb-4c65-a2fc-ccab5a218b0a", + "metadata": {}, + "outputs": [], + "source": [ + "m2_table.rename(columns={'ntd_id_y' : 'ntd_id_2022', 'ntd_id_x' : 'ntd_id_2021', 'city_y' : 'city'}, inplace = True)" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "e28ec20c-2c54-4597-80bd-f2809f6bde0c", + "metadata": {}, + "outputs": [], + "source": [ + "col_order = ['ntd_id_2022', 'reported_by_name','legacy_ntd_id','agency_name','city', 'ntd_id_2021']\n", + "m2_table = m2_table[col_order]" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "7e4cc94d-37aa-4dee-ab80-fbb7e29ae68c", + "metadata": {}, + "outputs": [], + "source": [ + "m3_table = m3.loc[m3['_merge'] == \"both\", ['ntd_id_x', 'reported_by_name_y','legacy_ntd_id','agency_name_y','city_y','ntd_id_y']]" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "e8633710-afc4-4b82-a93a-edddf3c97ef5", + "metadata": {}, + "outputs": [], + "source": [ + "m3_table.rename(columns={'ntd_id_y' : 'ntd_id_2022', 'ntd_id_x' : 'ntd_id_2021', 'city_y' : 'city', 'reported_by_name_y' : 'reported_by_name', 'agency_name_y' : 'agency_name' }, inplace = True)" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "de589814-3e55-459a-81e8-e4403960e788", + "metadata": {}, + "outputs": [], + "source": [ + "m3_table = m3_table[col_order]" + ] + }, + { + "cell_type": "markdown", + "id": "f823a67d-496a-4fed-b641-f5ef57da068a", + "metadata": {}, + "source": [ + "Creating the concatenated table " + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "4001a4eb-44c6-4155-bd4c-51b5042c10b6", + "metadata": {}, + "outputs": [], + "source": [ + "crosswalk = pd.concat([m1_table,m2_table,m3_table], axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "09c4ca75-7432-47ad-bac8-f7967f083f20", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ntd_id_2022reported_by_namelegacy_ntd_idagency_namecityntd_id_2021
111239NaNNaNHyannis Harbor Tours, Inc.Hyannis11239
221NaN21Whatcom Transportation AuthorityBellingham21
630010NaN3010Lehigh and Northampton Transportation AuthorityAllentown30010
730041NaN3041Board of Commissioners of Allegany County Mary...Cumberland30041
830201NaNNaNCity of BaltimoreBaltimore30201
.....................
8450468Indiana Department of Transportation5R02-039Whitley County Council on AgingColumbia City5R02-50468
8750483Indiana Department of Transportation5R02-005Franklin County Senior ServicesBrookville5R02-50483
9450308Indiana Department of Transportation5R02-020Area 10 Council on Aging of Monroe CountyEllettsville5R02-50308
9650276Indiana Department of Transportation5R02-032Four Rivers Resource ServicesLoogootee5R02-50276
10150230Indiana Department of Transportation5R02-011Boone County Senior ServicesLebanon5R02-50230
\n", + "

2960 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " ntd_id_2022 reported_by_name legacy_ntd_id \\\n", + "1 11239 NaN NaN \n", + "2 21 NaN 21 \n", + "6 30010 NaN 3010 \n", + "7 30041 NaN 3041 \n", + "8 30201 NaN NaN \n", + ".. ... ... ... \n", + "84 50468 Indiana Department of Transportation 5R02-039 \n", + "87 50483 Indiana Department of Transportation 5R02-005 \n", + "94 50308 Indiana Department of Transportation 5R02-020 \n", + "96 50276 Indiana Department of Transportation 5R02-032 \n", + "101 50230 Indiana Department of Transportation 5R02-011 \n", + "\n", + " agency_name city \\\n", + "1 Hyannis Harbor Tours, Inc. Hyannis \n", + "2 Whatcom Transportation Authority Bellingham \n", + "6 Lehigh and Northampton Transportation Authority Allentown \n", + "7 Board of Commissioners of Allegany County Mary... Cumberland \n", + "8 City of Baltimore Baltimore \n", + ".. ... ... \n", + "84 Whitley County Council on Aging Columbia City \n", + "87 Franklin County Senior Services Brookville \n", + "94 Area 10 Council on Aging of Monroe County Ellettsville \n", + "96 Four Rivers Resource Services Loogootee \n", + "101 Boone County Senior Services Lebanon \n", + "\n", + " ntd_id_2021 \n", + "1 11239 \n", + "2 21 \n", + "6 30010 \n", + "7 30041 \n", + "8 30201 \n", + ".. ... \n", + "84 5R02-50468 \n", + "87 5R02-50483 \n", + "94 5R02-50308 \n", + "96 5R02-50276 \n", + "101 5R02-50230 \n", + "\n", + "[2960 rows x 6 columns]" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "crosswalk" + ] + }, + { + "cell_type": "markdown", + "id": "889084ff-cf7b-4776-924a-1a5b8f2ad3b3", + "metadata": {}, + "source": [ + "#### Checking for duplicated rows" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "62dd8c51-e1a5-4ada-8e48-3b5245500895", + "metadata": {}, + "outputs": [], + "source": [ + "duplicated_rows = crosswalk.duplicated()" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "9d5cea88-0a30-49b7-b5a4-c1fa796c886f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "duplicated_rows:\n", + "Empty DataFrame\n", + "Columns: [ntd_id_2022, reported_by_name, legacy_ntd_id, agency_name, city, ntd_id_2021]\n", + "Index: []\n" + ] + } + ], + "source": [ + "print(\"duplicated_rows:\")\n", + "print(crosswalk[duplicated_rows])" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "49cfc12a-9cca-4639-91cc-2107286634b5", + "metadata": {}, + "outputs": [], + "source": [ + "duplicated_rows_subset = crosswalk.duplicated(subset = ['ntd_id_2022', 'agency_name', 'reported_by_name'])" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "9bb49836-cbf2-446a-88c2-a4368b907789", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "duplicated_rows_subset:\n", + "Empty DataFrame\n", + "Columns: [ntd_id_2022, reported_by_name, legacy_ntd_id, agency_name, city, ntd_id_2021]\n", + "Index: []\n" + ] + } + ], + "source": [ + "print(\"duplicated_rows_subset:\")\n", + "print(crosswalk[duplicated_rows_subset])" + ] + }, + { + "cell_type": "markdown", + "id": "0ab10ebf-297e-4811-a1e1-51e6c25ab4bc", + "metadata": {}, + "source": [ + "#### Manual crosswalk for the remaining ntd_ids\n" + ] + }, + { + "cell_type": "markdown", + "id": "bbd7d5f1-0321-4f18-b886-8debda0b01e9", + "metadata": {}, + "source": [ + "- Pioneer valley transit authority intercity bus reported by Massachusetts Department of Transportation has incorrect ntd_id (checked from the NTD profile data)\n", + "- Some agencies do not have 2021 data but has 2020 data. \n" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "id": "334fba5f-30ea-48aa-88cf-a728c517810d", + "metadata": {}, + "outputs": [], + "source": [ + "LOCAL_PATHa = \"manual_crosswalk.csv\"\n", + "m4_table = pd.read_csv(LOCAL_PATHa)" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "id": "89d9788b-95f2-46ec-a458-1b7c6a18c97b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ntd_id_2022reported_by_namelegacy_ntd_idagency_namecityntd_id_2021Remarks
0231NaN0T33Confederated Tribes of Warm SpringsWarm SpringsNaNNo 2021 data
190306NaNNaNStanislaus Regional Transit AuthorityModestoNaNthe county doesn’t have data after 2021 so cou...
\n", + "
" + ], + "text/plain": [ + " ntd_id_2022 reported_by_name legacy_ntd_id \\\n", + "0 231 NaN 0T33 \n", + "1 90306 NaN NaN \n", + "\n", + " agency_name city ntd_id_2021 \\\n", + "0 Confederated Tribes of Warm Springs Warm Springs NaN \n", + "1 Stanislaus Regional Transit Authority Modesto NaN \n", + "\n", + " Remarks \n", + "0 No 2021 data \n", + "1 the county doesn’t have data after 2021 so cou... " + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "m4_table.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "id": "c39ad008-3c93-48e2-9f79-06ea57cc8a42", + "metadata": {}, + "outputs": [], + "source": [ + "crosswalk['Remarks'] = pd.Series(dtype=float)" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "id": "444199a9-4ee5-4dc4-9078-372bc073f8bf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ntd_id_2022reported_by_namelegacy_ntd_idagency_namecityntd_id_2021Remarks
111239NaNNaNHyannis Harbor Tours, Inc.Hyannis11239NaN
221NaN21Whatcom Transportation AuthorityBellingham21NaN
\n", + "
" + ], + "text/plain": [ + " ntd_id_2022 reported_by_name legacy_ntd_id \\\n", + "1 11239 NaN NaN \n", + "2 21 NaN 21 \n", + "\n", + " agency_name city ntd_id_2021 Remarks \n", + "1 Hyannis Harbor Tours, Inc. Hyannis 11239 NaN \n", + "2 Whatcom Transportation Authority Bellingham 21 NaN " + ] + }, + "execution_count": 108, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "crosswalk.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "id": "58fcd1e0-0940-4456-9ef8-eab0be5f9d9c", + "metadata": {}, + "outputs": [], + "source": [ + "final_crosswalk = pd.concat([crosswalk, m4_table], axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "id": "67689071-abb7-4225-8c83-fac5ea275960", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ntd_id_2022reported_by_namelegacy_ntd_idagency_namecityntd_id_2021Remarksntd_id_2022Remarks
111239NaNNaNHyannis Harbor Tours, Inc.Hyannis11239NaNNaNNaN
221NaN21Whatcom Transportation AuthorityBellingham21NaNNaNNaN
630010NaN3010Lehigh and Northampton Transportation AuthorityAllentown30010NaNNaNNaN
730041NaN3041Board of Commissioners of Allegany County Mary...Cumberland30041NaNNaNNaN
830201NaNNaNCity of BaltimoreBaltimore30201NaNNaNNaN
..............................
47NaNArizona Department of TransportationNaNNew Horizons Disability Empowerment CenterPrescott ValleyNaNNaN99463.0NaN
48NaNUtah Department of TransportationNaNKane County Senior Citizens Improvement CorpKanabNaNNaN88253.0NaN
49NaNArizona Department of TransportationNaNSaguaro Foundation Community Living ProgramsYumaNaNNaN99462.0NaN
50NaNArizona Department of TransportationNaNTown of springerville AZSpringervilleNaNNaN99467.0NaN
51NaNWest Virginia Department of TransportationNaNGreenbrier Co. Committee on Aging, Inc.RupertNaNNaN31048.0NaN
\n", + "

3012 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " ntd_id_2022 reported_by_name legacy_ntd_id \\\n", + "1 11239 NaN NaN \n", + "2 21 NaN 21 \n", + "6 30010 NaN 3010 \n", + "7 30041 NaN 3041 \n", + "8 30201 NaN NaN \n", + ".. ... ... ... \n", + "47 NaN Arizona Department of Transportation NaN \n", + "48 NaN Utah Department of Transportation NaN \n", + "49 NaN Arizona Department of Transportation NaN \n", + "50 NaN Arizona Department of Transportation NaN \n", + "51 NaN West Virginia Department of Transportation NaN \n", + "\n", + " agency_name city \\\n", + "1 Hyannis Harbor Tours, Inc. Hyannis \n", + "2 Whatcom Transportation Authority Bellingham \n", + "6 Lehigh and Northampton Transportation Authority Allentown \n", + "7 Board of Commissioners of Allegany County Mary... Cumberland \n", + "8 City of Baltimore Baltimore \n", + ".. ... ... \n", + "47 New Horizons Disability Empowerment Center Prescott Valley \n", + "48 Kane County Senior Citizens Improvement Corp Kanab \n", + "49 Saguaro Foundation Community Living Programs Yuma \n", + "50 Town of springerville AZ Springerville \n", + "51 Greenbrier Co. Committee on Aging, Inc. Rupert \n", + "\n", + " ntd_id_2021 Remarks ntd_id_2022 Remarks \n", + "1 11239 NaN NaN NaN \n", + "2 21 NaN NaN NaN \n", + "6 30010 NaN NaN NaN \n", + "7 30041 NaN NaN NaN \n", + "8 30201 NaN NaN NaN \n", + ".. ... ... ... ... \n", + "47 NaN NaN 99463.0 NaN \n", + "48 NaN NaN 88253.0 NaN \n", + "49 NaN NaN 99462.0 NaN \n", + "50 NaN NaN 99467.0 NaN \n", + "51 NaN NaN 31048.0 NaN \n", + "\n", + "[3012 rows x 9 columns]" + ] + }, + "execution_count": 110, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_crosswalk" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dcb4a61d-d7f2-49f2-98f9-04bb1260b2c7", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 1de124f3d78151541f63e4f9c3fdafc7a08f5162 Mon Sep 17 00:00:00 2001 From: Shweta Adhikari Date: Thu, 18 Jan 2024 23:05:48 +0000 Subject: [PATCH 2/4] ntd_2021_2022_crosswalk --- ntd/ntd_id_changes_crosswalk.ipynb | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ntd/ntd_id_changes_crosswalk.ipynb b/ntd/ntd_id_changes_crosswalk.ipynb index 33ec347e3..56b8655f1 100644 --- a/ntd/ntd_id_changes_crosswalk.ipynb +++ b/ntd/ntd_id_changes_crosswalk.ipynb @@ -2078,6 +2078,14 @@ "crosswalk.head(2)" ] }, + { + "cell_type": "markdown", + "id": "747884de-7da6-4100-b8d1-d002a9e8bb13", + "metadata": {}, + "source": [ + "### Final Concatenation" + ] + }, { "cell_type": "code", "execution_count": 109, From 2ed772a342323c24fb83ae34c5a0ca89a1846786 Mon Sep 17 00:00:00 2001 From: Shweta Adhikari Date: Fri, 19 Jan 2024 01:15:24 +0000 Subject: [PATCH 3/4] Deleted blank file --- .../2021_2022_crosswalk.ipynb | 2 +- ntd/manual_crosswalk.csv | 53 + ntd/ntd_id_changes.ipynb | 2095 ----------------- 3 files changed, 54 insertions(+), 2096 deletions(-) rename Untitled.ipynb => ntd/2021_2022_crosswalk.ipynb (92%) create mode 100644 ntd/manual_crosswalk.csv delete mode 100644 ntd/ntd_id_changes.ipynb diff --git a/Untitled.ipynb b/ntd/2021_2022_crosswalk.ipynb similarity index 92% rename from Untitled.ipynb rename to ntd/2021_2022_crosswalk.ipynb index 063c20ec1..5190d7888 100644 --- a/Untitled.ipynb +++ b/ntd/2021_2022_crosswalk.ipynb @@ -3,7 +3,7 @@ { "cell_type": "code", "execution_count": null, - "id": "834539b7-de8e-47ca-bb5c-30bc55fa1e2a", + "id": "1954eed3-c549-4130-85e4-853c9a1b1fae", "metadata": {}, "outputs": [], "source": [] diff --git a/ntd/manual_crosswalk.csv b/ntd/manual_crosswalk.csv new file mode 100644 index 000000000..9078ca3f8 --- /dev/null +++ b/ntd/manual_crosswalk.csv @@ -0,0 +1,53 @@ +ntd_id_2022 ,reported_by_name,legacy_ntd_id,agency_name,city,ntd_id_2021,Remarks +231,,0T33,Confederated Tribes of Warm Springs,Warm Springs,,No 2021 data +90306,,,Stanislaus Regional Transit Authority,Modesto,,the county doesn’t have data after 2021 so could be the same data +44982,Georgia Department of Transportation,,Southern Georgia Regional Commission,Valdosta,,Lowndes county has no data after 2021 so could be the same data +40199,,,Autonomous Municipality of Vega Alta,Vega Alta,,No 2021 data +41182,Municipality of Comerio,1R05-003,Comerio,,,no 2021 data but has 2020 data under legacy id 4R09-41182 +10008,Massachusetts Department of Transportation ,,Pioneer Valley Transit Authority Intercity Bus,Springfield,, +99394,,9T29,Elko Band Council,Elko,,No 2021 data +20228,,,"Enterprise Holdings, LLC",Syracuse,, +66345,,,Taos Pueblo,Taos,, +90307,,,Town of Prescott Valley,Prescott Valley,, +20227,,,Town of Wallkill,Middletown,, +55665,Indiana Department of Transportation,,Brown County YMCA,Nashville,, +80299,,,"Silver Key Senior Services, Inc",Colorado Springs,, +90310,,,Tulare County Regional Transit Agency,Visalia,, +30205,,,"Transit Management of Central Maryland, Inc.",Annapolis Junction,, +80138,Colorado Department of Transportation,8R01-043,"Dynamic Dimensions, Inc.",Burlington,, +40274,,,Town of Morrisville,Morrisville,, +80179,Colorado Department of Transportation,8R01-029,City of Burlington,Burlington,, +60276,,,Plaquemines Port Harbor Terminal District,Belle Chasse,, +88239,Utah Department of Transportation,,High Valley Transit District,,, +50525,,,Hamilton County,Noblesville,, +80211,Colorado Department of Transportation,8R01-033,Town of Limon,Limon,, +44983,Mississippi Department of Transportation,,Philadelphia Transit,Philadelphia,, +88236,Utah Department of Transportation,,Washington County (on behalf of Council On Aging),St. George,, +11246,,,Vermont Agency of Transportation,Barre,, +31047,West Virginia Department of Transportation,,Raleigh County Commission on Aging,Beckley,, +50526,,,"Hancock County, Indiana",Greenfield,, +88268,Utah Department of Transportation,,Cache Employment & Training Center (CETC),Logan,, +90309,,,City of Casa Grande,Casa Grande,, +99460,Arizona Department of Transportation,,Verde Valley Caregivers Coalition,Sedona,, +99461,Arizona Department of Transportation,,Town of Florence,Florence,, +88242,Utah Department of Transportation,,Sevier County ,Richfield,, +30998,West Virginia Department of Transportation,,Ritchie County Integrated Family Services,Harrisville,, +88245,Utah Department of Transportation,,Piute County Senior Citizen Center,Junction,, +88265,Utah Department of Transportation,,Common Ground Outdoor Adventures,Logan,, +99465,Arizona Department of Transportation,,Easterseals Blake Foundation,Tucson,, +55666,Ohio Department of Transportation,,Buckeye Community Services,Jackson,, +31014,West Virginia Department of Transportation,,Friends of Hansford Center,Saint Albans,, +99466,Arizona Department of Transportation,,Assist to Independence,Tuba City,, +99464,Arizona Department of Transportation,,Helping Ourselves Pursue Enrichment Incorporated,Tucson,, +55664,Indiana Department of Transportation,,Shelby Senior Services ,Shelbyville,, +88233,Utah Department of Transportation,,Uintah Healthcare Special Service District,Vernal,, +88304,Colorado Department of Transportation,,"Boulder, City of",Boulder,, +31049,West Virginia Department of Transportation,,Hardy County Committee on Aging Inc,Moorefield,, +31002,West Virginia Department of Transportation,,Neighbor Network/Pleasants County Senior Services,Saint Marys,, +31050,West Virginia Department of Transportation,,Council of Senior Tyler Countians Inc,Middlebourne,, +31046,West Virginia Department of Transportation,,"Commission On Aging Family Services Inc., The",Petersburg,, +99463,Arizona Department of Transportation,,New Horizons Disability Empowerment Center,Prescott Valley,, +88253,Utah Department of Transportation,,Kane County Senior Citizens Improvement Corp,Kanab,, +99462,Arizona Department of Transportation,,Saguaro Foundation Community Living Programs,Yuma,, +99467,Arizona Department of Transportation,,Town of springerville AZ,Springerville,, +31048,West Virginia Department of Transportation,,"Greenbrier Co. Committee on Aging, Inc.",Rupert,, diff --git a/ntd/ntd_id_changes.ipynb b/ntd/ntd_id_changes.ipynb deleted file mode 100644 index 6afeb77f2..000000000 --- a/ntd/ntd_id_changes.ipynb +++ /dev/null @@ -1,2095 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "207db9ce-8673-4dcd-be52-21ee4ed8d748", - "metadata": {}, - "source": [ - "# NTD 2021 vs 2022\n", - "\n", - "* Explore where / how much `ntd_id` has changed between 2021 and 2022 exports. \n", - "* Use BigQuery, from `mart_ntd` grab 2021 and 2022 and export as csv.\n", - "* Pass it through a variety of merges can help winnow down which ones we do need to manually reconcile.\n", - "* Go from more stringent merges (ids and names) to looser merges\n", - " * Parsing the `ntd_id` and grabbing the suffix portion can help, since there's a good batch where the `ntd_id` change in 2022 is a new prefix added, where `ntd_id_2022 = [xxxx-ntd_id_2021]`." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "f1cd334d-29c7-4ea8-8530-1bde520627e6", - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "\n", - "#GCS_BUCKET = \"gs://calitp-ntd-data-products\"\n", - "#GCS_PATH = (f\"{GCS_BUCKET}annual-database-agency-information/\"\n", - "# \"dt=2023-11-15/ts=2023-11-15T22:29:51.925030+00:00/year=2022/\"\n", - "# \"annual-database-agency-information.jsonl.gz\"\n", - "# )\n", - "\n", - "LOCAL_PATH = \"ntd_2021_2022.csv\"" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "85b4ef76-811d-499e-94c1-166c9cd4b0d5", - "metadata": {}, - "outputs": [], - "source": [ - "df_full = pd.read_csv(LOCAL_PATH)\n", - "\n", - "df_2021 = df_full[df_full.year==2021].reset_index(drop=True)\n", - "df_2022 = df_full[df_full.year==2022].reset_index(drop=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "07b0ddbf-c3cb-421e-84e3-5f1a1c5c7b4b", - "metadata": {}, - "outputs": [], - "source": [ - "def basic_stats(df: pd.DataFrame): \n", - " cols = [\"ntd_id\", \"legacy_ntd_id\", \n", - " \"reported_by_name\", \n", - " \"agency_name\",\n", - " \"city\"\n", - " ]\n", - " for c in cols:\n", - " print(f\"nunique {c}: {df[c].nunique()}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "efdc6f10-95e0-4383-a903-f7a45d39a009", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "nunique ntd_id: 3021\n", - "nunique legacy_ntd_id: 2110\n", - "nunique reported_by_name: 64\n", - "nunique agency_name: 2929\n", - "nunique city: 1974\n" - ] - } - ], - "source": [ - "basic_stats(df_2021)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "4946c23e-1f78-47ea-a800-7ef17fc0b088", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "nunique ntd_id: 2969\n", - "nunique legacy_ntd_id: 2092\n", - "nunique reported_by_name: 60\n", - "nunique agency_name: 2924\n", - "nunique city: 1964\n" - ] - } - ], - "source": [ - "basic_stats(df_2022)" - ] - }, - { - "cell_type": "markdown", - "id": "d4c30967-7586-4716-95fe-c9a8f838a98a", - "metadata": {}, - "source": [ - "## Full set of merge columns\n", - "\n", - "* `ntd_id, legacy_ntd_id, agency_name, reported_by_name, city`\n", - "\n", - "Probably the most complete set of identifiers" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "bdc41b2e-27be-45d2-9416-12067cf8607a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "left_only 1891\n", - "right_only 1887\n", - "both 1130\n", - "Name: _merge, dtype: int64" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cols = [\"ntd_id\", \"legacy_ntd_id\", \n", - " \"reported_by_name\", \n", - " \"agency_name\",\n", - " \"city\", \n", - " ]\n", - "\n", - "m1 = pd.merge(\n", - " df_2021[cols + [\"key\", \"year\"]],\n", - " df_2022[cols + [\"key\", \"year\"]],\n", - " on = cols,\n", - " how = \"outer\",\n", - " indicator = True\n", - ")\n", - "\n", - "m1._merge.value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "0e780b65-8e21-47c4-918f-c11d9433d1a6", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "left_only 0.385289\n", - "right_only 0.384474\n", - "both 0.230236\n", - "Name: _merge, dtype: float64" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "m1._merge.value_counts(normalize=True)" - ] - }, - { - "cell_type": "markdown", - "id": "78610d5e-e499-4117-bf37-19d7b0e3f516", - "metadata": {}, - "source": [ - "### Majority will merge if we don't merge on `ntd_id`, but use `legacy_ntd_id` and variations of name instead\n", - "\n", - "These could be solved if we just use `agency_name` and `reported_by_name`. Even though `ntd_id` is not necessarily the same, `legacy_ntd_id` appears to be (even if same means it's NaN for both years).\n", - "\n", - "Exclude `city` from merge, since there are some that change cities, but it's the same agency. We do want to know if city changes from year to year." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "55f77342-1d83-463d-863a-ad5e2144ab57", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ntd_idlegacy_ntd_idreported_by_nameagency_namecitykey_xyear_xkey_yyear_y_merge
3614702427R01-015Iowa Department of Transportation10-15 Regional Transit AgencyOttumwaNaNNaN49ba215d3793e5a58366b7dde1cb587e2022.0right_only
20027R01-702427R01-015Iowa Department of Transportation10-15 Regional Transit AgencyOttumwa4400da6a95a6a328ab9b7f651a1bdeb42021.0NaNNaNleft_only
963A0002-55329NaNStark Area Regional Transit AuthorityABCD, Inc.Canton463265c2d666621a7757630e5548fc3b2021.0NaNNaNleft_only
453655329NaNStark Area Regional Transit AuthorityABCD, Inc.CantonNaNNaN16b0e8ca1108b744db0f7952d8f6b3812022.0right_only
321188285NaNColorado Department of TransportationAEX - Alpine ExpressGunnisonNaNNaN00fc807db5fbd510c447e71e21069b192022.0right_only
.................................
21829R02-910709R02-019California Department of TransportationYosemite Area Regional Transportation SystemMercedf68cf9996e537ff2968339f4508dd60d2021.0NaNNaNleft_only
4269910709R02-019California Department of TransportationYosemite Area Regional Transportation SystemMercedNaNNaN1e8e914f03d459189a27bcdc2fc321062022.0right_only
442266320NaNTexas Department of TransportationZapata CountyZapataNaNNaNe19ba955b67fff1dc3fd906aad5ab8732022.0right_only
13906R05-66320NaNTexas Department of TransportationZapata CountyZapatadc5f81697aabb1d127a4c03225a6cdde2021.0NaNNaNleft_only
20256R04-66305NaNOklahoma Department of Transportationcity of MarlowMarlow05ff80e6366b6af2a59d99020f5f0d302021.0NaNNaNleft_only
\n", - "

3778 rows × 10 columns

\n", - "
" - ], - "text/plain": [ - " ntd_id legacy_ntd_id reported_by_name \\\n", - "3614 70242 7R01-015 Iowa Department of Transportation \n", - "2002 7R01-70242 7R01-015 Iowa Department of Transportation \n", - "963 A0002-55329 NaN Stark Area Regional Transit Authority \n", - "4536 55329 NaN Stark Area Regional Transit Authority \n", - "3211 88285 NaN Colorado Department of Transportation \n", - "... ... ... ... \n", - "2182 9R02-91070 9R02-019 California Department of Transportation \n", - "4269 91070 9R02-019 California Department of Transportation \n", - "4422 66320 NaN Texas Department of Transportation \n", - "1390 6R05-66320 NaN Texas Department of Transportation \n", - "2025 6R04-66305 NaN Oklahoma Department of Transportation \n", - "\n", - " agency_name city \\\n", - "3614 10-15 Regional Transit Agency Ottumwa \n", - "2002 10-15 Regional Transit Agency Ottumwa \n", - "963 ABCD, Inc. Canton \n", - "4536 ABCD, Inc. Canton \n", - "3211 AEX - Alpine Express Gunnison \n", - "... ... ... \n", - "2182 Yosemite Area Regional Transportation System Merced \n", - "4269 Yosemite Area Regional Transportation System Merced \n", - "4422 Zapata County Zapata \n", - "1390 Zapata County Zapata \n", - "2025 city of Marlow Marlow \n", - "\n", - " key_x year_x \\\n", - "3614 NaN NaN \n", - "2002 4400da6a95a6a328ab9b7f651a1bdeb4 2021.0 \n", - "963 463265c2d666621a7757630e5548fc3b 2021.0 \n", - "4536 NaN NaN \n", - "3211 NaN NaN \n", - "... ... ... \n", - "2182 f68cf9996e537ff2968339f4508dd60d 2021.0 \n", - "4269 NaN NaN \n", - "4422 NaN NaN \n", - "1390 dc5f81697aabb1d127a4c03225a6cdde 2021.0 \n", - "2025 05ff80e6366b6af2a59d99020f5f0d30 2021.0 \n", - "\n", - " key_y year_y _merge \n", - "3614 49ba215d3793e5a58366b7dde1cb587e 2022.0 right_only \n", - "2002 NaN NaN left_only \n", - "963 NaN NaN left_only \n", - "4536 16b0e8ca1108b744db0f7952d8f6b381 2022.0 right_only \n", - "3211 00fc807db5fbd510c447e71e21069b19 2022.0 right_only \n", - "... ... ... ... \n", - "2182 NaN NaN left_only \n", - "4269 1e8e914f03d459189a27bcdc2fc32106 2022.0 right_only \n", - "4422 e19ba955b67fff1dc3fd906aad5ab873 2022.0 right_only \n", - "1390 NaN NaN left_only \n", - "2025 NaN NaN left_only \n", - "\n", - "[3778 rows x 10 columns]" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "m1[m1._merge != \"both\"].sort_values(\"agency_name\")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "6d3a3ac0-345d-4931-9e75-9220dada96ce", - "metadata": {}, - "outputs": [], - "source": [ - "# Remove the keys that would now merge between the years\n", - "ok_keys = np.concatenate((\n", - " m1[m1._merge==\"both\"].key_x.unique(),\n", - " m1[m1._merge==\"both\"].key_y.unique()\n", - "))" - ] - }, - { - "cell_type": "markdown", - "id": "aefb7d2b-d1dc-4690-be39-622907dd9bb0", - "metadata": {}, - "source": [ - "### Merge on `legacy_ntd_id`, variations of name\n", - "These probably need to be manually addressed using a crosswalk, since we want to store variations of the `agency_name` over time." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "8129207f-dc83-4d2b-8a11-5238a0effed7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "both 1789\n", - "left_only 102\n", - "right_only 98\n", - "Name: _merge, dtype: int64" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Remove city from merge, since there are a couple\n", - "# that would merge but are set to diff cities\n", - "m2 = pd.merge(\n", - " df_2021[~df_2021.key.isin(ok_keys)][cols + [\"key\", \"year\"]],\n", - " df_2022[~df_2022.key.isin(ok_keys)][cols + [\"key\", \"year\"]],\n", - " on = [\"legacy_ntd_id\", \"reported_by_name\", \"agency_name\"],\n", - " how = \"outer\",\n", - " indicator = True\n", - ")\n", - "\n", - "m2._merge.value_counts()" - ] - }, - { - "cell_type": "markdown", - "id": "c1db5321-c1b4-41f4-a024-d6c8deb975e7", - "metadata": {}, - "source": [ - "Some of these are clearly the same agency when you spot check it (abbreviations, minor changes in name, etc), but some are less obvious. Might have to start compiling a larger crosswalk of variations on agency name.\n", - "\n", - "These would be grouped together (`ntd_id` changes...but with an additional prefix)\n", - "* Whitley County Commissioners\n", - "* Whitley County Council on Aging" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "a1288ff4-ce8d-4dea-b192-0846edd0482d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ntd_id_xlegacy_ntd_idreported_by_nameagency_namecity_xkey_xyear_xntd_id_ycity_ykey_yyear_y_merge
13426R02-66284NaNLouisiana Department of TransportationAcadia COACrowley5127e6c6b2c3c3018cc2629cfa94f74e2021.0NaNNaNNaNNaNleft_only
1692A0004-00415NaNValley Regional TransitAda County Highway DistrictBoise8b1ef6431abe5e9a5eb5c0a1124477d92021.0NaNNaNNaNNaNleft_only
1928NaN5R02-020Indiana Department of TransportationArea 10 Council on Aging of Monroe CountyNaNNaNNaN50308Ellettsville00d4f9e499a70f66a8a72c0f4c42d4082022.0right_only
1971NaN5R02-017Indiana Department of TransportationArea IV Agency on Aging and Community Action P...NaNNaNNaN50365Lafayettea561a8b8df6a37f2dd4f141e4902d3462022.0right_only
1937NaNNaNArizona Department of TransportationAssist to IndependenceNaNNaNNaN99466Tuba City42b9d82b9be91d4b150f55f9e88a809f2022.0right_only
.......................................
15775R02-504685R02-039Indiana Department of TransportationWhitley County CommissionersColumbia City518ace07cb87072ec541dc7fef24e6f72021.0NaNNaNNaNNaNleft_only
1973NaN5R02-039Indiana Department of TransportationWhitley County Council on AgingNaNNaNNaN50468Columbia City0b058ae9f5d5a1e9780257c981a6b91c2022.0right_only
16866R02-66299NaNLouisiana Department of TransportationWinn COAWinnfield57e123ed53fdf3ce74769f216cc2c39d2021.0NaNNaNNaNNaNleft_only
1933NaN5R02-024Indiana Department of TransportationYMCA of VincennesNaNNaNNaN50392Vincennes13d170d217ad0cf7860e62ce136510042022.0right_only
12486R04-66305NaNOklahoma Department of Transportationcity of MarlowMarlow05ff80e6366b6af2a59d99020f5f0d302021.0NaNNaNNaNNaNleft_only
\n", - "

200 rows × 12 columns

\n", - "
" - ], - "text/plain": [ - " ntd_id_x legacy_ntd_id reported_by_name \\\n", - "1342 6R02-66284 NaN Louisiana Department of Transportation \n", - "1692 A0004-00415 NaN Valley Regional Transit \n", - "1928 NaN 5R02-020 Indiana Department of Transportation \n", - "1971 NaN 5R02-017 Indiana Department of Transportation \n", - "1937 NaN NaN Arizona Department of Transportation \n", - "... ... ... ... \n", - "1577 5R02-50468 5R02-039 Indiana Department of Transportation \n", - "1973 NaN 5R02-039 Indiana Department of Transportation \n", - "1686 6R02-66299 NaN Louisiana Department of Transportation \n", - "1933 NaN 5R02-024 Indiana Department of Transportation \n", - "1248 6R04-66305 NaN Oklahoma Department of Transportation \n", - "\n", - " agency_name city_x \\\n", - "1342 Acadia COA Crowley \n", - "1692 Ada County Highway District Boise \n", - "1928 Area 10 Council on Aging of Monroe County NaN \n", - "1971 Area IV Agency on Aging and Community Action P... NaN \n", - "1937 Assist to Independence NaN \n", - "... ... ... \n", - "1577 Whitley County Commissioners Columbia City \n", - "1973 Whitley County Council on Aging NaN \n", - "1686 Winn COA Winnfield \n", - "1933 YMCA of Vincennes NaN \n", - "1248 city of Marlow Marlow \n", - "\n", - " key_x year_x ntd_id_y city_y \\\n", - "1342 5127e6c6b2c3c3018cc2629cfa94f74e 2021.0 NaN NaN \n", - "1692 8b1ef6431abe5e9a5eb5c0a1124477d9 2021.0 NaN NaN \n", - "1928 NaN NaN 50308 Ellettsville \n", - "1971 NaN NaN 50365 Lafayette \n", - "1937 NaN NaN 99466 Tuba City \n", - "... ... ... ... ... \n", - "1577 518ace07cb87072ec541dc7fef24e6f7 2021.0 NaN NaN \n", - "1973 NaN NaN 50468 Columbia City \n", - "1686 57e123ed53fdf3ce74769f216cc2c39d 2021.0 NaN NaN \n", - "1933 NaN NaN 50392 Vincennes \n", - "1248 05ff80e6366b6af2a59d99020f5f0d30 2021.0 NaN NaN \n", - "\n", - " key_y year_y _merge \n", - "1342 NaN NaN left_only \n", - "1692 NaN NaN left_only \n", - "1928 00d4f9e499a70f66a8a72c0f4c42d408 2022.0 right_only \n", - "1971 a561a8b8df6a37f2dd4f141e4902d346 2022.0 right_only \n", - "1937 42b9d82b9be91d4b150f55f9e88a809f 2022.0 right_only \n", - "... ... ... ... \n", - "1577 NaN NaN left_only \n", - "1973 0b058ae9f5d5a1e9780257c981a6b91c 2022.0 right_only \n", - "1686 NaN NaN left_only \n", - "1933 13d170d217ad0cf7860e62ce13651004 2022.0 right_only \n", - "1248 NaN NaN left_only \n", - "\n", - "[200 rows x 12 columns]" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "m2[m2._merge != \"both\"].sort_values([\"agency_name\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "c3e6dd34-1eea-421e-819a-c7b69431352a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['Acadia COA', 'Ada County Highway District',\n", - " 'Area 10 Council on Aging of Monroe County',\n", - " 'Area IV Agency on Aging and Community Action Programs ',\n", - " 'Assist to Independence', 'Atlanta-Region Transit Link Authority',\n", - " 'Autonomous Municipality of Vega Alta', 'Bacon County',\n", - " 'Baltimore County Department of Aging',\n", - " 'Baltimore County Department of Public Works Transportation',\n", - " 'Bay State Cruise Company', 'Bay State LLC', 'Berrien County',\n", - " 'Blue River Services ', 'Boone County Commissioners',\n", - " 'Boone County Senior Services', 'Boulder, City of',\n", - " 'Brantley County', 'Brooks County Transit',\n", - " 'Brown County Senior Citizens Council', 'Brown County YMCA',\n", - " 'Buckeye Community Services', 'CENTRAL MISSISSIPPI INC',\n", - " 'CHANDLER, CITY OF', 'Cache Employment & Training Center (CETC)',\n", - " 'Calcasieu Voluntary Council in Aging', 'Cardinal Services ',\n", - " 'Cass County Commissioners', 'Cass County Council on Aging ',\n", - " 'Central City ', 'Central Mississippi, Incorporated',\n", - " 'City of Benkelman', 'City of Buchanan', 'City of Burlington',\n", - " 'City of Casa Grande', 'City of Dinuba', 'City of Elk Grove',\n", - " 'City of Modesto ', 'City of Tulare', 'City of Woodlake',\n", - " 'Clay Senior & Community Services, Inc.',\n", - " 'Clinton County Commissioners',\n", - " 'Commission On Aging Family Services Inc., The',\n", - " 'Common Ground Outdoor Adventures',\n", - " 'Concordia Senior Citizen Center',\n", - " 'Confederated Tribes of Warm Springs', 'Cook County Transit',\n", - " 'Coquille Indian Tribe', 'Council of Senior Tyler Countians Inc',\n", - " 'Coweta County', 'DATTCO, Inc. ', 'DeKalb County Commissioners',\n", - " 'DeKalb County Council on Aging ', 'Donaldsonville ARC',\n", - " 'Dynamic Dimensions, Inc.', 'Easterseals Blake Foundation',\n", - " 'Elko Band Council', 'Enterprise Holdings, LLC',\n", - " 'Enterprise Rideshare - Michigan',\n", - " 'Fayette Community Commissioners',\n", - " 'Fayette County Council Agency on Aging and Aged',\n", - " 'Four Rivers Resource Services', 'Franklin County Commissioners',\n", - " 'Franklin County Senior Services ', 'Friends of Hansford Center',\n", - " 'Fulton County Commissioners', 'Fulton County Council on Aging ',\n", - " 'GOODYEAR, CITY OF',\n", - " 'Greater Cincinnati Behavioral Health Services ',\n", - " 'Greenbrier Co. Committee on Aging, Inc.', 'Greyhound Lines, Inc.',\n", - " 'Hamilton County', 'Hancock County, Indiana',\n", - " 'Hardy County Committee on Aging Inc',\n", - " 'Harrison County Commissioners',\n", - " 'Helping Ourselves Pursue Enrichment Incorporated',\n", - " 'High Valley Transit District', 'Historic Hoosier Hills',\n", - " 'Huntington County Commissioners',\n", - " 'Huntington County Council on Aging ',\n", - " 'Indianapolis and Marion County Public Transportation',\n", - " 'Isanti County', 'Jalbert Leasing, Inc. ',\n", - " 'Jay County Commissioners', 'Jefferson Bus Lines', 'Jefferson COA',\n", - " 'Kane County Senior Citizens Improvement Corp',\n", - " 'Knox County Commissioners', 'Kosciusko County Commissioners',\n", - " 'LIFT COMMUNITY ACTION AGENCY INC LIFT',\n", - " 'LaGrange County Commissioners',\n", - " 'LaGrange County Council on Aging ', 'Lafayette COA',\n", - " 'LifeStream Services ', 'LifeTime Resources ',\n", - " 'Little Dixie Community Action Agency, Inc.',\n", - " 'Living Wells in Wabash County Council on Aging ',\n", - " 'Lowndes County', 'MESA, CITY OF', 'Marshall County Commissioners',\n", - " 'Marshall County Council on Aging ', 'Miami County Commissioners',\n", - " 'Miami County YMCA ', 'Michigan Department of Transportation',\n", - " 'Monroe County Commissioners', 'Monroe County Council on Aging',\n", - " 'Municipality of Anasco', 'Municipality of Añasco',\n", - " 'Municipality of Comerio', 'NEW BOURBON REGIONAL PORT AUTH',\n", - " 'Native Village of Crooked Creek',\n", - " 'Neighbor Network/Pleasants County Senior Services',\n", - " 'New Bourbon Regional Port Authority',\n", - " 'New Horizons Disability Empowerment Center',\n", - " 'Noble County Commissioners', 'Noble County Council on Aging ',\n", - " 'Northeast Transportation Services', 'Orange County Commissioners',\n", - " 'Orange County First Chance Center ', 'Ouachita ARC',\n", - " 'Paul Philippe Resource Center ', 'Philadelphia Transit',\n", - " 'Pierce County Transit',\n", - " 'Pioneer Valley Transit Authority Intercity Bus',\n", - " 'Piute County Senior Citizen Center',\n", - " 'Plaquemines Parish Government',\n", - " 'Plaquemines Port Harbor Terminal District',\n", - " 'Playa Vista Parks and Landscape Corp.',\n", - " 'Port of Administration Authority',\n", - " 'Puerto Rico Highway and Transportation Authority - Público',\n", - " 'Puerto Rico Highway and Transportation Authority – Público',\n", - " 'Raleigh County Commission on Aging',\n", - " 'Regional Transportation Authority of Pima County',\n", - " 'Ritchie County Integrated Family Services',\n", - " 'Rush County Commissioners', 'Rush County Senior Services ',\n", - " 'Saguaro Foundation Community Living Programs',\n", - " 'Salt Lake Express', 'Sevier County ', 'Shelby Senior Services ',\n", - " 'Shoshoni Senior Citizens', 'Shoshoni Senior Citizens Center',\n", - " 'Silver Key Senior Services, Inc', 'Sistersville Ferry ',\n", - " 'Southern Georgia Regional Commission',\n", - " 'Southern Indiana Development Commission', 'Sphere, Inc',\n", - " 'Stanislaus County ', 'Stanislaus Regional Transit Authority',\n", - " 'Steuben County Commissioners', 'Steuben County Council on Aging',\n", - " 'Sunrise Northeast, Inc.', 'Taos Pueblo', 'The Shuttle, Inc.',\n", - " 'Tift Transit System', 'Tippecanoe County Commissioners',\n", - " 'Town of Avon (CT)', 'Town of Beacon Falls', 'Town of Canton',\n", - " 'Town of Florence', 'Town of Limon', 'Town of Litchfield',\n", - " 'Town of Mansfield', 'Town of Montville', 'Town of Morrisville',\n", - " 'Town of Oro Valley ', 'Town of Prescott Valley',\n", - " 'Town of Roxbury', 'Town of Sprague', 'Town of Wallkill',\n", - " 'Town of springerville AZ',\n", - " 'Transit Management of Central Maryland, Inc.',\n", - " 'Tucker County Senior Citizens',\n", - " 'Tulare County Regional Transit Agency', 'Turner County ',\n", - " 'Uintah Healthcare Special Service District',\n", - " 'Union County Commissioners', 'Union County Council on Aging ',\n", - " 'Verde Valley Caregivers Coalition',\n", - " 'Vermont Agency of Transportation', 'Wabash County Commissioners',\n", - " 'Ware County',\n", - " 'Washington County (on behalf of Council On Aging)',\n", - " 'Wells County Commissioners', 'Wells County Council on Aging ',\n", - " 'Wet Mountain Valley Rotary Community Service Inc.',\n", - " 'Wet Mountain Valley Rotary Community Service, Inc.',\n", - " 'White County Commissioners', 'White County Council on Aging ',\n", - " 'Whitley County Commissioners', 'Whitley County Council on Aging ',\n", - " 'Winn COA', 'YMCA of Vincennes ', 'city of Marlow'], dtype=object)" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "m2[m2._merge != \"both\"].sort_values([\"agency_name\"]).agency_name.unique()" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "574bdf2c-ec56-44c9-932f-49f7132c65e5", - "metadata": {}, - "outputs": [], - "source": [ - "# Remove the keys that would now merge between the years\n", - "ok_keys2 = np.concatenate((\n", - " ok_keys,\n", - " m2[m2._merge == \"both\"].key_x.unique(),\n", - " m2[m2._merge == \"both\"].key_y.unique()\n", - "))" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "c455cd0e-3722-40f5-b721-4aa1c3f48dab", - "metadata": {}, - "outputs": [], - "source": [ - "def ntd_id_parsed(df: pd.DataFrame):\n", - " df = df.assign(\n", - " ntd_id_no_prefix = df.apply(\n", - " lambda x:\n", - " x.ntd_id.split(\"-\")[1] if \"-\" in x.ntd_id\n", - " else x.ntd_id, \n", - " axis=1)\n", - " )\n", - " \n", - " return df" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "6f310097-f6c0-4a55-8231-ddf7eb9b69fe", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "left_only 61\n", - "right_only 54\n", - "both 41\n", - "Name: _merge, dtype: int64" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "m3 = pd.merge(\n", - " df_2021[~df_2021.key.isin(ok_keys2)][cols + [\"key\", \"year\"]].pipe(ntd_id_parsed),\n", - " df_2022[~df_2022.key.isin(ok_keys2)][cols + [\"key\", \"year\"]].pipe(ntd_id_parsed),\n", - " on = [\"ntd_id_no_prefix\", \"legacy_ntd_id\",],\n", - " how = \"outer\",\n", - " indicator = True\n", - ")\n", - "\n", - "m3._merge.value_counts()" - ] - }, - { - "cell_type": "markdown", - "id": "ca15336a-42d3-4d8e-94c1-9142bd4f5394", - "metadata": {}, - "source": [ - "### Parsing `ntd_id` into a no-prefix version can help \n", - "\n", - "If we are going to remove the prefix...we could do this earlier and hopefully get more to merge. Although, this does mean that we are left with variations on `agency_name` and `reported_by_name`, which still needs to make it into our crosswalk, even if we do not use it to merge.\n", - "\n", - "Left with a batch of about 60 to reconcile manually." - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "a3645517-1cbe-42b0-bd1a-3c080f64bd9f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ntd_id_xlegacy_ntd_idreported_by_name_xagency_name_xcity_xkey_xyear_xntd_id_no_prefixntd_id_yreported_by_name_yagency_name_ycity_ykey_yyear_y_merge
6111238NaNNaNBay State Cruise CompanyNaN3e52a7c9c6b2b668f3f8d8aaf7407bb12021.01123811238NaNBay State LLCBoston0d5ba39814c8d079274b8a9f0cc570942022.0both
563R03-301303R03-010Maryland Department of TransportationBaltimore County Department of AgingBaltimorea78720414cc2f8110bc995522b13af5f2021.03013030130Maryland Department of TransportationBaltimore County Department of Public Works Tr...Towson82625c40cb5f233003bd99bf662c0f8c2022.0both
24401054105NaNPuerto Rico Highway and Transportation Authori...San Juan15dfe7c9516dd80cfcbd7e44894c3cd02021.04010540105NaNPuerto Rico Highway and Transportation Authori...San Juan671fb05927de6f93c0b7fbc231ba0c3f2022.0both
3040269NaNNaNMunicipality of AnascoAnasco727e463669495f24983f88e0c1ebc84e2021.04026940269NaNMunicipality of AñascoAnascobf314c9d218b6b33879aca0e15a0aec62022.0both
634R03-411334R03-115Georgia Department of TransportationCoweta CountyNewnana2b14dc5773bf3c0ad04dfe1757d3be62021.04113341133NaNCoweta CountyNewnandd0a411799e522f52608bb1977d5a1492022.0both
24R05-44979NaNMississippi Department of TransportationCENTRAL MISSISSIPPI INCWinonaa526f2abfd1780150ba2b794124d73432021.04497944979Mississippi Department of TransportationCentral Mississippi, IncorporatedWinonab1c19267d796b92f9da7b0491ac539e72022.0both
40501935193NaNEnterprise Rideshare - MichiganFarmington Hillsfb118835888478e280646363880222b42021.05019350193NaNMichigan Department of TransportationNaN5db618a2d5318c4dff590a53f339688a2022.0both
1015R02-502305R02-011Indiana Department of TransportationBoone County CommissionersLebanon75778eabc386b8eafba4c9ecdba3400e2021.05023050230Indiana Department of TransportationBoone County Senior ServicesLebanon51057c0cb82672b5f5c59afd1e9217ef2022.0both
255R02-502465R02-012Indiana Department of TransportationMarshall County CommissionersPlymouth77570092c7163f61fcf99dd2fbaa8d162021.05024650246Indiana Department of TransportationMarshall County Council on AgingPlymouth67f53fd56b632e5a1edbcbc9beb819cb2022.0both
495R02-502485R02-043Indiana Department of TransportationSteuben County CommissionersAngola09864a8b4fdbf162b3d9a0434a63efa92021.05024850248Indiana Department of TransportationSteuben County Council on AgingAngola68c0951ae10e643259fbad16a68b05012022.0both
965R02-502765R02-032Indiana Department of TransportationSouthern Indiana Development CommissionLoogootee075d2df66f3c9d3f4b306cec2d8ecf882021.05027650276Indiana Department of TransportationFour Rivers Resource ServicesLoogooteee8c4337e4072dcff70836d5aa72d6d0e2022.0both
525R02-502805R02-042Indiana Department of TransportationRush County CommissionersRushville3e35f464fafa37874c5479eba6a8de392021.05028050280Indiana Department of TransportationRush County Senior ServicesRushvilled3ffb9b7a07a22227815c8c8503847772022.0both
755R02-502815R02-002Indiana Department of TransportationCass County CommissionersLogansport10cea1f86819cd03704ddad23d6ed6b22021.05028150281Indiana Department of TransportationCass County Council on AgingLogansport021a9930cc37edb3d2b04e0f47c0518f2022.0both
145R02-502895R02-016Indiana Department of TransportationKosciusko County CommissionersWarsaw7d5e5c112a450de2882acc863ffbf3472021.05028950289Indiana Department of TransportationCardinal ServicesWarsaw4e91b967ea275a6f3bd9d748e07552992022.0both
545R02-503055R02-044Indiana Department of TransportationLaGrange County CommissionersLagrange46e1f9e708e4bdc8dfc3b8b11b12c58f2021.05030550305Indiana Department of TransportationLaGrange County Council on AgingLagrange06013a9919f57f65e7e19604d97fd8762022.0both
685R02-503075R02-037Indiana Department of TransportationJay County CommissionersYorktowne62a7d09f50c6891a7f7b49711ceded02021.05030750307Indiana Department of TransportationLifeStream ServicesYorktown24ad33959cbe7492784bed1b191484482022.0both
945R02-503085R02-020Indiana Department of TransportationMonroe County CommissionersEllettsville74f575bf059841b71a869fd3efd66f4f2021.05030850308Indiana Department of TransportationArea 10 Council on Aging of Monroe CountyEllettsville00d4f9e499a70f66a8a72c0f4c42d4082022.0both
105R02-503245R02-026Indiana Department of TransportationHistoric Hoosier HillsDillsboroe6710aa2c700355db16170c3b148a71d2021.05032450324Indiana Department of TransportationLifeTime ResourcesDillsboro8b9f8b88b29442184defa14305fd5cbf2022.0both
375R02-503305R02-041Indiana Department of TransportationDeKalb County CommissionersAuburnea5f5035fe8120cb93533e55883674df2021.05033050330Indiana Department of TransportationDeKalb County Council on AgingAuburnf86b7873b1bf7f66462e1126e2e6cba82022.0both
415R02-503475R02-031Indiana Department of TransportationHarrison County CommissionersGeorgetown8af031820471773675f7d78ef0ffbb042021.05034750347Indiana Department of TransportationBlue River ServicesCorydon5c6ef7763f8d3c56e2116e821a31f8692022.0both
825R02-503615R02-010Indiana Department of TransportationHuntington County CommissionersHuntington3f28a1bf0e4b0718de390209ebd536bb2021.05036150361Indiana Department of TransportationHuntington County Council on AgingHuntingtonedbf0f943976f37a77cff85b990040942022.0both
115R02-503655R02-017Indiana Department of TransportationTippecanoe County CommissionersLafayette8cff92c1747d39b61233f3134a9b2f502021.05036550365Indiana Department of TransportationArea IV Agency on Aging and Community Action P...Lafayettea561a8b8df6a37f2dd4f141e4902d3462022.0both
765R02-503875R02-038Indiana Department of TransportationUnion County CommissionersLibertyc3620a86511824ab2f1dbc60412614192021.05038750387Indiana Department of TransportationUnion County Council on AgingLibertye5314f96a7b7c1c48612fa62b2f88a7c2022.0both
265R02-503895R02-019Indiana Department of TransportationOrange County CommissionersPaoli68cce0722e9a67b873de8d28850aa9aa2021.05038950389Indiana Department of TransportationOrange County First Chance CenterPaoli7ac967575d36aa7c14596cfd5dc679c72022.0both
595R02-503925R02-024Indiana Department of TransportationKnox County CommissionersVincennes33a64f3c79c604d28a89374170b0700a2021.05039250392Indiana Department of TransportationYMCA of VincennesVincennes13d170d217ad0cf7860e62ce136510042022.0both
05R02-503995R02-004Indiana Department of TransportationFayette Community CommissionersConnersvilleeeae713c46c405e3c8d4f001df04ab082021.05039950399Indiana Department of TransportationFayette County Council Agency on Aging and AgedConnersville4e3cd53439c5b16a8db255353ab5f4522022.0both
425R02-504035R02-029Indiana Department of TransportationWells County CommissionersBluffton0959996691b94d855104db0d33b2e1c12021.05040350403Indiana Department of TransportationWells County Council on AgingBluffton0686e12cdf0e5053047e63ff96e7c83b2022.0both
155R02-504225R02-021Indiana Department of TransportationNoble County CommissionersAlbion019f5b9e0cc9b402ee30dfbc52e708892021.05042250422Indiana Department of TransportationNoble County Council on AgingAlbion5ef553988ed5ffe7abf562f9700c5e682022.0both
385R02-504415R02-030Indiana Department of TransportationWabash County CommissionersWabash95e83cbe96f8b149cf091a688e4efd7e2021.05044150441Indiana Department of TransportationLiving Wells in Wabash County Council on AgingWabash2547e2ebe22ddcb01ae9adedd3d38a4b2022.0both
745R02-504445R02-028Indiana Department of TransportationWhite County CommissionersMonticello3e9bd5cfa85e2e200430e73cd794e4eb2021.05044450444Indiana Department of TransportationWhite County Council on AgingMonticello91d0c6fd3810f8971287135ca843990d2022.0both
75R02-504545R02-023Indiana Department of TransportationMiami County CommissionersPerue0b549ebd3208c1adf7e8daff9b02e562021.05045450454Indiana Department of TransportationMiami County YMCAPeru8a1db7fdef4bd99effcad121ce960a7a2022.0both
845R02-504685R02-039Indiana Department of TransportationWhitley County CommissionersColumbia City518ace07cb87072ec541dc7fef24e6f72021.05046850468Indiana Department of TransportationWhitley County Council on AgingColumbia City0b058ae9f5d5a1e9780257c981a6b91c2022.0both
875R02-504835R02-005Indiana Department of TransportationFranklin County CommissionersBrookville588365cb83b4b249b502b797bf2172992021.05048350483Indiana Department of TransportationFranklin County Senior ServicesBrookvilleb76b01f10dcbcd6616d05ff50d86b3532022.0both
605R02-504845R02-006Indiana Department of TransportationFulton County CommissionersRochesterc15d9675ebd9e79a1e9b21ce171c907c2021.05048450484Indiana Department of TransportationFulton County Council on AgingRochester938d6b2a2c0297e4b2d77ddece412a5d2022.0both
475R02-504995R02-003Indiana Department of TransportationClinton County CommissionersFrankfortae316730728efa5b437e71f8a123e5102021.05049950499Indiana Department of TransportationPaul Philippe Resource CenterFrankfort0ea094bd3b3b713d00eb383fec67b8492022.0both
77601276127NaNPlaquemines Parish GovernmentBelle Chasse507c162cbce391ef4ac5db7e7e420b0d2021.06012760127Louisiana Department of TransportationPlaquemines Parish GovernmentBelle Chasse68bcb32f91c2e25018dfff92f2f3739a2022.0both
46R04-601916R04-002Oklahoma Department of TransportationLittle Dixie Community Action Agency, Inc.Hugoe60a5ce844c6fc0427a182ccb89190a42021.06019160191Oklahoma Department of TransportationLIFT COMMUNITY ACTION AGENCY INC LIFTHugo3bfbdeaa0429bd8cd0fddc2f598926612022.0both
43A0025-66344NaNNorth Central Texas Council of GovernmentsNortheast Transportation ServicesFort Worth6078b40173c8a29881a4e217181547d62021.06634466344NaNNortheast Transportation ServicesFort Worthd7f9a702076e56039e38b4deb3798b492022.0both
297R03-77086NaNMissouri Department of TransportationNEW BOURBON REGIONAL PORT AUTHPerryville76f5a9389da3eb3cf3060057c880c3f62021.07708677086Missouri Department of TransportationNew Bourbon Regional Port AuthorityPerryville66ac0917e90aea3a1df01b4303967ec32022.0both
278R01-801188R01-036Colorado Department of TransportationWet Mountain Valley Rotary Community Service Inc.Westcliffe64c2d7f92534e463125d4f208bf04f042021.08011880118Colorado Department of TransportationWet Mountain Valley Rotary Community Service, ...Westcliffea2a83e9259a69aec792f7b67e3292d062022.0both
648R05-801278R05-046Wyoming Department of TransportationShoshoni Senior CitizensNaN783e80db49b656ed4e4eb77c4c25e9742021.08012780127Wyoming Department of TransportationShoshoni Senior Citizens CenterShoshonia3674c39d8e7ff4026b491956dc201932022.0both
\n", - "
" - ], - "text/plain": [ - " ntd_id_x legacy_ntd_id reported_by_name_x \\\n", - "61 11238 NaN NaN \n", - "56 3R03-30130 3R03-010 Maryland Department of Transportation \n", - "24 40105 4105 NaN \n", - "30 40269 NaN NaN \n", - "63 4R03-41133 4R03-115 Georgia Department of Transportation \n", - "2 4R05-44979 NaN Mississippi Department of Transportation \n", - "40 50193 5193 NaN \n", - "101 5R02-50230 5R02-011 Indiana Department of Transportation \n", - "25 5R02-50246 5R02-012 Indiana Department of Transportation \n", - "49 5R02-50248 5R02-043 Indiana Department of Transportation \n", - "96 5R02-50276 5R02-032 Indiana Department of Transportation \n", - "52 5R02-50280 5R02-042 Indiana Department of Transportation \n", - "75 5R02-50281 5R02-002 Indiana Department of Transportation \n", - "14 5R02-50289 5R02-016 Indiana Department of Transportation \n", - "54 5R02-50305 5R02-044 Indiana Department of Transportation \n", - "68 5R02-50307 5R02-037 Indiana Department of Transportation \n", - "94 5R02-50308 5R02-020 Indiana Department of Transportation \n", - "10 5R02-50324 5R02-026 Indiana Department of Transportation \n", - "37 5R02-50330 5R02-041 Indiana Department of Transportation \n", - "41 5R02-50347 5R02-031 Indiana Department of Transportation \n", - "82 5R02-50361 5R02-010 Indiana Department of Transportation \n", - "11 5R02-50365 5R02-017 Indiana Department of Transportation \n", - "76 5R02-50387 5R02-038 Indiana Department of Transportation \n", - "26 5R02-50389 5R02-019 Indiana Department of Transportation \n", - "59 5R02-50392 5R02-024 Indiana Department of Transportation \n", - "0 5R02-50399 5R02-004 Indiana Department of Transportation \n", - "42 5R02-50403 5R02-029 Indiana Department of Transportation \n", - "15 5R02-50422 5R02-021 Indiana Department of Transportation \n", - "38 5R02-50441 5R02-030 Indiana Department of Transportation \n", - "74 5R02-50444 5R02-028 Indiana Department of Transportation \n", - "7 5R02-50454 5R02-023 Indiana Department of Transportation \n", - "84 5R02-50468 5R02-039 Indiana Department of Transportation \n", - "87 5R02-50483 5R02-005 Indiana Department of Transportation \n", - "60 5R02-50484 5R02-006 Indiana Department of Transportation \n", - "47 5R02-50499 5R02-003 Indiana Department of Transportation \n", - "77 60127 6127 NaN \n", - "4 6R04-60191 6R04-002 Oklahoma Department of Transportation \n", - "43 A0025-66344 NaN North Central Texas Council of Governments \n", - "29 7R03-77086 NaN Missouri Department of Transportation \n", - "27 8R01-80118 8R01-036 Colorado Department of Transportation \n", - "64 8R05-80127 8R05-046 Wyoming Department of Transportation \n", - "\n", - " agency_name_x city_x \\\n", - "61 Bay State Cruise Company NaN \n", - "56 Baltimore County Department of Aging Baltimore \n", - "24 Puerto Rico Highway and Transportation Authori... San Juan \n", - "30 Municipality of Anasco Anasco \n", - "63 Coweta County Newnan \n", - "2 CENTRAL MISSISSIPPI INC Winona \n", - "40 Enterprise Rideshare - Michigan Farmington Hills \n", - "101 Boone County Commissioners Lebanon \n", - "25 Marshall County Commissioners Plymouth \n", - "49 Steuben County Commissioners Angola \n", - "96 Southern Indiana Development Commission Loogootee \n", - "52 Rush County Commissioners Rushville \n", - "75 Cass County Commissioners Logansport \n", - "14 Kosciusko County Commissioners Warsaw \n", - "54 LaGrange County Commissioners Lagrange \n", - "68 Jay County Commissioners Yorktown \n", - "94 Monroe County Commissioners Ellettsville \n", - "10 Historic Hoosier Hills Dillsboro \n", - "37 DeKalb County Commissioners Auburn \n", - "41 Harrison County Commissioners Georgetown \n", - "82 Huntington County Commissioners Huntington \n", - "11 Tippecanoe County Commissioners Lafayette \n", - "76 Union County Commissioners Liberty \n", - "26 Orange County Commissioners Paoli \n", - "59 Knox County Commissioners Vincennes \n", - "0 Fayette Community Commissioners Connersville \n", - "42 Wells County Commissioners Bluffton \n", - "15 Noble County Commissioners Albion \n", - "38 Wabash County Commissioners Wabash \n", - "74 White County Commissioners Monticello \n", - "7 Miami County Commissioners Peru \n", - "84 Whitley County Commissioners Columbia City \n", - "87 Franklin County Commissioners Brookville \n", - "60 Fulton County Commissioners Rochester \n", - "47 Clinton County Commissioners Frankfort \n", - "77 Plaquemines Parish Government Belle Chasse \n", - "4 Little Dixie Community Action Agency, Inc. Hugo \n", - "43 Northeast Transportation Services Fort Worth \n", - "29 NEW BOURBON REGIONAL PORT AUTH Perryville \n", - "27 Wet Mountain Valley Rotary Community Service Inc. Westcliffe \n", - "64 Shoshoni Senior Citizens NaN \n", - "\n", - " key_x year_x ntd_id_no_prefix ntd_id_y \\\n", - "61 3e52a7c9c6b2b668f3f8d8aaf7407bb1 2021.0 11238 11238 \n", - "56 a78720414cc2f8110bc995522b13af5f 2021.0 30130 30130 \n", - "24 15dfe7c9516dd80cfcbd7e44894c3cd0 2021.0 40105 40105 \n", - "30 727e463669495f24983f88e0c1ebc84e 2021.0 40269 40269 \n", - "63 a2b14dc5773bf3c0ad04dfe1757d3be6 2021.0 41133 41133 \n", - "2 a526f2abfd1780150ba2b794124d7343 2021.0 44979 44979 \n", - "40 fb118835888478e280646363880222b4 2021.0 50193 50193 \n", - "101 75778eabc386b8eafba4c9ecdba3400e 2021.0 50230 50230 \n", - "25 77570092c7163f61fcf99dd2fbaa8d16 2021.0 50246 50246 \n", - "49 09864a8b4fdbf162b3d9a0434a63efa9 2021.0 50248 50248 \n", - "96 075d2df66f3c9d3f4b306cec2d8ecf88 2021.0 50276 50276 \n", - "52 3e35f464fafa37874c5479eba6a8de39 2021.0 50280 50280 \n", - "75 10cea1f86819cd03704ddad23d6ed6b2 2021.0 50281 50281 \n", - "14 7d5e5c112a450de2882acc863ffbf347 2021.0 50289 50289 \n", - "54 46e1f9e708e4bdc8dfc3b8b11b12c58f 2021.0 50305 50305 \n", - "68 e62a7d09f50c6891a7f7b49711ceded0 2021.0 50307 50307 \n", - "94 74f575bf059841b71a869fd3efd66f4f 2021.0 50308 50308 \n", - "10 e6710aa2c700355db16170c3b148a71d 2021.0 50324 50324 \n", - "37 ea5f5035fe8120cb93533e55883674df 2021.0 50330 50330 \n", - "41 8af031820471773675f7d78ef0ffbb04 2021.0 50347 50347 \n", - "82 3f28a1bf0e4b0718de390209ebd536bb 2021.0 50361 50361 \n", - "11 8cff92c1747d39b61233f3134a9b2f50 2021.0 50365 50365 \n", - "76 c3620a86511824ab2f1dbc6041261419 2021.0 50387 50387 \n", - "26 68cce0722e9a67b873de8d28850aa9aa 2021.0 50389 50389 \n", - "59 33a64f3c79c604d28a89374170b0700a 2021.0 50392 50392 \n", - "0 eeae713c46c405e3c8d4f001df04ab08 2021.0 50399 50399 \n", - "42 0959996691b94d855104db0d33b2e1c1 2021.0 50403 50403 \n", - "15 019f5b9e0cc9b402ee30dfbc52e70889 2021.0 50422 50422 \n", - "38 95e83cbe96f8b149cf091a688e4efd7e 2021.0 50441 50441 \n", - "74 3e9bd5cfa85e2e200430e73cd794e4eb 2021.0 50444 50444 \n", - "7 e0b549ebd3208c1adf7e8daff9b02e56 2021.0 50454 50454 \n", - "84 518ace07cb87072ec541dc7fef24e6f7 2021.0 50468 50468 \n", - "87 588365cb83b4b249b502b797bf217299 2021.0 50483 50483 \n", - "60 c15d9675ebd9e79a1e9b21ce171c907c 2021.0 50484 50484 \n", - "47 ae316730728efa5b437e71f8a123e510 2021.0 50499 50499 \n", - "77 507c162cbce391ef4ac5db7e7e420b0d 2021.0 60127 60127 \n", - "4 e60a5ce844c6fc0427a182ccb89190a4 2021.0 60191 60191 \n", - "43 6078b40173c8a29881a4e217181547d6 2021.0 66344 66344 \n", - "29 76f5a9389da3eb3cf3060057c880c3f6 2021.0 77086 77086 \n", - "27 64c2d7f92534e463125d4f208bf04f04 2021.0 80118 80118 \n", - "64 783e80db49b656ed4e4eb77c4c25e974 2021.0 80127 80127 \n", - "\n", - " reported_by_name_y \\\n", - "61 NaN \n", - "56 Maryland Department of Transportation \n", - "24 NaN \n", - "30 NaN \n", - "63 NaN \n", - "2 Mississippi Department of Transportation \n", - "40 NaN \n", - "101 Indiana Department of Transportation \n", - "25 Indiana Department of Transportation \n", - "49 Indiana Department of Transportation \n", - "96 Indiana Department of Transportation \n", - "52 Indiana Department of Transportation \n", - "75 Indiana Department of Transportation \n", - "14 Indiana Department of Transportation \n", - "54 Indiana Department of Transportation \n", - "68 Indiana Department of Transportation \n", - "94 Indiana Department of Transportation \n", - "10 Indiana Department of Transportation \n", - "37 Indiana Department of Transportation \n", - "41 Indiana Department of Transportation \n", - "82 Indiana Department of Transportation \n", - "11 Indiana Department of Transportation \n", - "76 Indiana Department of Transportation \n", - "26 Indiana Department of Transportation \n", - "59 Indiana Department of Transportation \n", - "0 Indiana Department of Transportation \n", - "42 Indiana Department of Transportation \n", - "15 Indiana Department of Transportation \n", - "38 Indiana Department of Transportation \n", - "74 Indiana Department of Transportation \n", - "7 Indiana Department of Transportation \n", - "84 Indiana Department of Transportation \n", - "87 Indiana Department of Transportation \n", - "60 Indiana Department of Transportation \n", - "47 Indiana Department of Transportation \n", - "77 Louisiana Department of Transportation \n", - "4 Oklahoma Department of Transportation \n", - "43 NaN \n", - "29 Missouri Department of Transportation \n", - "27 Colorado Department of Transportation \n", - "64 Wyoming Department of Transportation \n", - "\n", - " agency_name_y city_y \\\n", - "61 Bay State LLC Boston \n", - "56 Baltimore County Department of Public Works Tr... Towson \n", - "24 Puerto Rico Highway and Transportation Authori... San Juan \n", - "30 Municipality of Añasco Anasco \n", - "63 Coweta County Newnan \n", - "2 Central Mississippi, Incorporated Winona \n", - "40 Michigan Department of Transportation NaN \n", - "101 Boone County Senior Services Lebanon \n", - "25 Marshall County Council on Aging Plymouth \n", - "49 Steuben County Council on Aging Angola \n", - "96 Four Rivers Resource Services Loogootee \n", - "52 Rush County Senior Services Rushville \n", - "75 Cass County Council on Aging Logansport \n", - "14 Cardinal Services Warsaw \n", - "54 LaGrange County Council on Aging Lagrange \n", - "68 LifeStream Services Yorktown \n", - "94 Area 10 Council on Aging of Monroe County Ellettsville \n", - "10 LifeTime Resources Dillsboro \n", - "37 DeKalb County Council on Aging Auburn \n", - "41 Blue River Services Corydon \n", - "82 Huntington County Council on Aging Huntington \n", - "11 Area IV Agency on Aging and Community Action P... Lafayette \n", - "76 Union County Council on Aging Liberty \n", - "26 Orange County First Chance Center Paoli \n", - "59 YMCA of Vincennes Vincennes \n", - "0 Fayette County Council Agency on Aging and Aged Connersville \n", - "42 Wells County Council on Aging Bluffton \n", - "15 Noble County Council on Aging Albion \n", - "38 Living Wells in Wabash County Council on Aging Wabash \n", - "74 White County Council on Aging Monticello \n", - "7 Miami County YMCA Peru \n", - "84 Whitley County Council on Aging Columbia City \n", - "87 Franklin County Senior Services Brookville \n", - "60 Fulton County Council on Aging Rochester \n", - "47 Paul Philippe Resource Center Frankfort \n", - "77 Plaquemines Parish Government Belle Chasse \n", - "4 LIFT COMMUNITY ACTION AGENCY INC LIFT Hugo \n", - "43 Northeast Transportation Services Fort Worth \n", - "29 New Bourbon Regional Port Authority Perryville \n", - "27 Wet Mountain Valley Rotary Community Service, ... Westcliffe \n", - "64 Shoshoni Senior Citizens Center Shoshoni \n", - "\n", - " key_y year_y _merge \n", - "61 0d5ba39814c8d079274b8a9f0cc57094 2022.0 both \n", - "56 82625c40cb5f233003bd99bf662c0f8c 2022.0 both \n", - "24 671fb05927de6f93c0b7fbc231ba0c3f 2022.0 both \n", - "30 bf314c9d218b6b33879aca0e15a0aec6 2022.0 both \n", - "63 dd0a411799e522f52608bb1977d5a149 2022.0 both \n", - "2 b1c19267d796b92f9da7b0491ac539e7 2022.0 both \n", - "40 5db618a2d5318c4dff590a53f339688a 2022.0 both \n", - "101 51057c0cb82672b5f5c59afd1e9217ef 2022.0 both \n", - "25 67f53fd56b632e5a1edbcbc9beb819cb 2022.0 both \n", - "49 68c0951ae10e643259fbad16a68b0501 2022.0 both \n", - "96 e8c4337e4072dcff70836d5aa72d6d0e 2022.0 both \n", - "52 d3ffb9b7a07a22227815c8c850384777 2022.0 both \n", - "75 021a9930cc37edb3d2b04e0f47c0518f 2022.0 both \n", - "14 4e91b967ea275a6f3bd9d748e0755299 2022.0 both \n", - "54 06013a9919f57f65e7e19604d97fd876 2022.0 both \n", - "68 24ad33959cbe7492784bed1b19148448 2022.0 both \n", - "94 00d4f9e499a70f66a8a72c0f4c42d408 2022.0 both \n", - "10 8b9f8b88b29442184defa14305fd5cbf 2022.0 both \n", - "37 f86b7873b1bf7f66462e1126e2e6cba8 2022.0 both \n", - "41 5c6ef7763f8d3c56e2116e821a31f869 2022.0 both \n", - "82 edbf0f943976f37a77cff85b99004094 2022.0 both \n", - "11 a561a8b8df6a37f2dd4f141e4902d346 2022.0 both \n", - "76 e5314f96a7b7c1c48612fa62b2f88a7c 2022.0 both \n", - "26 7ac967575d36aa7c14596cfd5dc679c7 2022.0 both \n", - "59 13d170d217ad0cf7860e62ce13651004 2022.0 both \n", - "0 4e3cd53439c5b16a8db255353ab5f452 2022.0 both \n", - "42 0686e12cdf0e5053047e63ff96e7c83b 2022.0 both \n", - "15 5ef553988ed5ffe7abf562f9700c5e68 2022.0 both \n", - "38 2547e2ebe22ddcb01ae9adedd3d38a4b 2022.0 both \n", - "74 91d0c6fd3810f8971287135ca843990d 2022.0 both \n", - "7 8a1db7fdef4bd99effcad121ce960a7a 2022.0 both \n", - "84 0b058ae9f5d5a1e9780257c981a6b91c 2022.0 both \n", - "87 b76b01f10dcbcd6616d05ff50d86b353 2022.0 both \n", - "60 938d6b2a2c0297e4b2d77ddece412a5d 2022.0 both \n", - "47 0ea094bd3b3b713d00eb383fec67b849 2022.0 both \n", - "77 68bcb32f91c2e25018dfff92f2f3739a 2022.0 both \n", - "4 3bfbdeaa0429bd8cd0fddc2f59892661 2022.0 both \n", - "43 d7f9a702076e56039e38b4deb3798b49 2022.0 both \n", - "29 66ac0917e90aea3a1df01b4303967ec3 2022.0 both \n", - "27 a2a83e9259a69aec792f7b67e3292d06 2022.0 both \n", - "64 a3674c39d8e7ff4026b491956dc20193 2022.0 both " - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "m3[m3._merge==\"both\"].sort_values(\"ntd_id_no_prefix\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "92906541-2a8f-46aa-aae0-7bc60d892fe1", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From 429da9c388dc5eded38ad6f5f491b1c80e48ce0d Mon Sep 17 00:00:00 2001 From: Shweta Adhikari Date: Fri, 19 Jan 2024 01:58:21 +0000 Subject: [PATCH 4/4] ntd_2021_2022crosswalk --- ntd/README.md | 14 +++- ntd/ntd_id_changes_crosswalk.ipynb | 103 +++++++++++++++++------------ 2 files changed, 75 insertions(+), 42 deletions(-) diff --git a/ntd/README.md b/ntd/README.md index 873d76444..b1db7081c 100644 --- a/ntd/README.md +++ b/ntd/README.md @@ -7,4 +7,16 @@ This report shows general ridership trends by transit agency, mode, and type of ## Datasets 1. NTD monthly data: https://www.transit.dot.gov/ntd/data-product/monthly-module-adjusted-data-release. 2. [RTPA list](https://gis.data.ca.gov/datasets/CAEnergy::regional-transportation-planning-agencies/explore?appid=cf412a17daaa47bca93c6d6b7e77aff0&edit=true) -3. Download our processed full data [here](https://console.cloud.google.com/storage/browser/calitp-publish-data-analysis). \ No newline at end of file +3. Download our processed full data [here](https://console.cloud.google.com/storage/browser/calitp-publish-data-analysis). + + +# NTD ID Changes 2021_2022 Crosswalk + +NTD IDs have changed for some agencies from 2021 to 2022 i.e. NTD Id is no longer unique at the year level. + +For the NTDs ID that have changed we have mapped to the historical data. + +## Datasets +1. NTD 2021_2022 raw data: (gs://calitp-analytics-data/data-analyses/ntd/ntd_2021_2022.csv) +2. Data that required manual crosswalk: (gs://calitp-analytics-data/data-analyses/ntd/manual.csv) +3. Download our final data that has crosswalk as well as additional rem diff --git a/ntd/ntd_id_changes_crosswalk.ipynb b/ntd/ntd_id_changes_crosswalk.ipynb index 56b8655f1..5a97bcc7d 100644 --- a/ntd/ntd_id_changes_crosswalk.ipynb +++ b/ntd/ntd_id_changes_crosswalk.ipynb @@ -24,11 +24,10 @@ "import numpy as np\n", "import pandas as pd\n", "\n", - "#GCS_BUCKET = \"gs://calitp-ntd-data-products\"\n", - "#GCS_PATH = (f\"{GCS_BUCKET}annual-database-agency-information/\"\n", - "# \"dt=2023-11-15/ts=2023-11-15T22:29:51.925030+00:00/year=2022/\"\n", - "# \"annual-database-agency-information.jsonl.gz\"\n", - "# )\n", + "#GCS_BUCKET = \"gs://calitp-analytics-data/data-analyses/ntd\"\n", + "GCS_PATH = \"gs://calitp-analytics-data/data-analyses/ntd/\"\n", + "FILE_NAMEa = \"ntd_2021_2022.csv\"\n", + "FILE_NAMEb = \"manual.csv\"\n", "\n", "LOCAL_PATH = \"ntd_2021_2022.csv\"" ] @@ -38,9 +37,19 @@ "execution_count": 2, "id": "85b4ef76-811d-499e-94c1-166c9cd4b0d5", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.9/site-packages/google/auth/_default.py:78: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. \n", + " warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n" + ] + } + ], "source": [ - "df_full = pd.read_csv(LOCAL_PATH)\n", + "df_full = pd.read_csv(\n", + " f\"{GCS_PATH}{FILE_NAMEa}\")\n", "\n", "df_2021 = df_full[df_full.year==2021].reset_index(drop=True)\n", "df_2022 = df_full[df_full.year==2022].reset_index(drop=True)" @@ -996,7 +1005,7 @@ }, { "cell_type": "code", - "execution_count": 82, + "execution_count": 16, "id": "fa780a5a-81d0-4953-b7ef-b83312fedd75", "metadata": {}, "outputs": [ @@ -1105,7 +1114,7 @@ "1 NaN NaN left_only " ] }, - "execution_count": 82, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -1130,7 +1139,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 17, "id": "92906541-2a8f-46aa-aae0-7bc60d892fe1", "metadata": {}, "outputs": [], @@ -1140,7 +1149,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 18, "id": "b18cdc3d-cedf-43c4-bbf5-e084e24d5a3b", "metadata": {}, "outputs": [ @@ -1296,7 +1305,7 @@ "[1130 rows x 5 columns]" ] }, - "execution_count": 30, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -1315,7 +1324,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 19, "id": "74bd172e-2585-4e9b-9bde-c3de85ef05e1", "metadata": {}, "outputs": [], @@ -1325,7 +1334,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 20, "id": "7c82c4cc-d32f-4803-a487-f8cda9f41db0", "metadata": {}, "outputs": [], @@ -1335,7 +1344,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 21, "id": "9962b97f-8254-422d-a7aa-38e9f22cb391", "metadata": {}, "outputs": [ @@ -1516,7 +1525,7 @@ "[1130 rows x 6 columns]" ] }, - "execution_count": 35, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -1535,7 +1544,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 22, "id": "7d30483d-47e9-480b-98c4-50e6143db933", "metadata": {}, "outputs": [], @@ -1545,7 +1554,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 23, "id": "cf74a45d-d6cb-4c65-a2fc-ccab5a218b0a", "metadata": {}, "outputs": [], @@ -1555,7 +1564,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 24, "id": "e28ec20c-2c54-4597-80bd-f2809f6bde0c", "metadata": {}, "outputs": [], @@ -1566,7 +1575,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 25, "id": "7e4cc94d-37aa-4dee-ab80-fbb7e29ae68c", "metadata": {}, "outputs": [], @@ -1576,7 +1585,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 26, "id": "e8633710-afc4-4b82-a93a-edddf3c97ef5", "metadata": {}, "outputs": [], @@ -1586,7 +1595,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 27, "id": "de589814-3e55-459a-81e8-e4403960e788", "metadata": {}, "outputs": [], @@ -1604,7 +1613,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 28, "id": "4001a4eb-44c6-4155-bd4c-51b5042c10b6", "metadata": {}, "outputs": [], @@ -1614,7 +1623,7 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 29, "id": "09c4ca75-7432-47ad-bac8-f7967f083f20", "metadata": {}, "outputs": [ @@ -1795,7 +1804,7 @@ "[2960 rows x 6 columns]" ] }, - "execution_count": 63, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -1814,7 +1823,7 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 30, "id": "62dd8c51-e1a5-4ada-8e48-3b5245500895", "metadata": {}, "outputs": [], @@ -1824,7 +1833,7 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 31, "id": "9d5cea88-0a30-49b7-b5a4-c1fa796c886f", "metadata": {}, "outputs": [ @@ -1846,7 +1855,7 @@ }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 32, "id": "49cfc12a-9cca-4639-91cc-2107286634b5", "metadata": {}, "outputs": [], @@ -1856,7 +1865,7 @@ }, { "cell_type": "code", - "execution_count": 77, + "execution_count": 33, "id": "9bb49836-cbf2-446a-88c2-a4368b907789", "metadata": {}, "outputs": [ @@ -1895,18 +1904,18 @@ }, { "cell_type": "code", - "execution_count": 105, + "execution_count": 34, "id": "334fba5f-30ea-48aa-88cf-a728c517810d", "metadata": {}, "outputs": [], "source": [ - "LOCAL_PATHa = \"manual_crosswalk.csv\"\n", - "m4_table = pd.read_csv(LOCAL_PATHa)" + "m4_table = pd.read_csv(\n", + " f\"{GCS_PATH}{FILE_NAMEb}\")" ] }, { "cell_type": "code", - "execution_count": 106, + "execution_count": 35, "id": "89d9788b-95f2-46ec-a458-1b7c6a18c97b", "metadata": {}, "outputs": [ @@ -1979,7 +1988,7 @@ "1 the county doesn’t have data after 2021 so cou... " ] }, - "execution_count": 106, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -1990,7 +1999,7 @@ }, { "cell_type": "code", - "execution_count": 107, + "execution_count": 36, "id": "c39ad008-3c93-48e2-9f79-06ea57cc8a42", "metadata": {}, "outputs": [], @@ -2000,7 +2009,7 @@ }, { "cell_type": "code", - "execution_count": 108, + "execution_count": 37, "id": "444199a9-4ee5-4dc4-9078-372bc073f8bf", "metadata": {}, "outputs": [ @@ -2069,7 +2078,7 @@ "2 Whatcom Transportation Authority Bellingham 21 NaN " ] }, - "execution_count": 108, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } @@ -2088,7 +2097,7 @@ }, { "cell_type": "code", - "execution_count": 109, + "execution_count": 38, "id": "58fcd1e0-0940-4456-9ef8-eab0be5f9d9c", "metadata": {}, "outputs": [], @@ -2098,7 +2107,7 @@ }, { "cell_type": "code", - "execution_count": 110, + "execution_count": 39, "id": "67689071-abb7-4225-8c83-fac5ea275960", "metadata": {}, "outputs": [ @@ -2315,7 +2324,7 @@ "[3012 rows x 9 columns]" ] }, - "execution_count": 110, + "execution_count": 39, "metadata": {}, "output_type": "execute_result" } @@ -2326,8 +2335,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 40, "id": "dcb4a61d-d7f2-49f2-98f9-04bb1260b2c7", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "final_crosswalk.to_csv(f\"{GCS_PATH}final_crosswalk.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2470aac7-5f05-4f9a-a988-8bcca4e53d80", "metadata": {}, "outputs": [], "source": []