From 8f3c66bd498191462f295d845143c3e1d517f1e2 Mon Sep 17 00:00:00 2001 From: Eric Dasmalchi Date: Sat, 4 Nov 2023 00:20:37 +0000 Subject: [PATCH 1/4] start readme and scripts --- conveyal_update/README.md | 15 + conveyal_update/check_download_feeds.ipynb | 1583 +++++++------------- conveyal_update/conveyal_vars.py | 10 + 3 files changed, 594 insertions(+), 1014 deletions(-) create mode 100644 conveyal_update/README.md create mode 100644 conveyal_update/conveyal_vars.py diff --git a/conveyal_update/README.md b/conveyal_update/README.md new file mode 100644 index 000000000..dd5f74b69 --- /dev/null +++ b/conveyal_update/README.md @@ -0,0 +1,15 @@ +# Updating GTFS/Network Bundles in Conveyal Analysis + +## General Notes + +* Conveyal is set up to ingest individual GTFS feeds (zipped feeds consisting of textfiles), while our warehouse extracts and transforms these. Potential approaches are: + * synthesize something that looks like a GTFS feed from our warehouse data (not attempted here) + * use our warehouse as a guide, but download and supply the raw individual feeds from when we archived them in GCS (this approach) + +## Scripts + +* Set target date in `conveyal_vars.py`. Region boundaries are also set here, but these should remain static unless the decision is made to use entirely different regions in Conveyal. Target date should be a mid-week day. +* TODO `evaluate_feeds.py` includes functions to check to see which feeds have service defined on the target date, and show feeds without any apparent service, including if that service is apparently captured in another feed. This helps check for potential coverage gaps, likely due to GTFS feed expirations and/or the [publishing future service issue](https://github.com/MobilityData/GTFS_Schedule_Best-Practices/issues/48(https://github.com/MobilityData/GTFS_Schedule_Best-Practices/issues/48). You may have to shift the target date around to find the best overall coverage, and/or manually edit important but missing feeds to define service if reasonable. +* TODO `match_feeds_regions.py` +* TODO `download_data.py` +* TODO `run_and_handoff.py` runs the above scripts, and additionally generates handoff outputs that can be used to download, crop, and filter OSM data for each region using... \ No newline at end of file diff --git a/conveyal_update/check_download_feeds.ipynb b/conveyal_update/check_download_feeds.ipynb index f773f0961..9d045a2c8 100644 --- a/conveyal_update/check_download_feeds.ipynb +++ b/conveyal_update/check_download_feeds.ipynb @@ -2,19 +2,10 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "c66b1132-56b5-4b11-9318-b8f268ecab30", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/lib/python3.9/site-packages/geopandas/_compat.py:124: UserWarning: The Shapely GEOS version (3.11.1-CAPI-1.17.1) is incompatible with the GEOS version PyGEOS was compiled with (3.10.1-CAPI-1.16.0). Conversions between both will be slow.\n", - " warnings.warn(\n" - ] - } - ], + "outputs": [], "source": [ "import os\n", "os.environ[\"CALITP_BQ_MAX_BYTES\"] = str(800_000_000_000)\n", @@ -33,7 +24,9 @@ "from shared_utils.geography_utils import WGS84, CA_NAD83Albers\n", "\n", "from tqdm.notebook import tqdm\n", - "tqdm.pandas()" + "tqdm.pandas()\n", + "\n", + "import conveyal_vars" ] }, { @@ -52,17 +45,17 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 20, "id": "296bfae5-a830-4b35-a202-87f0c3143c83", "metadata": {}, "outputs": [], "source": [ - "target_date = dt.date(2023, 9, 13)" + "target_date = conveyal_vars.target_date" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 31, "id": "f73da205-2eea-4acf-a649-c1f1e2441ac3", "metadata": {}, "outputs": [], @@ -73,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 32, "id": "d2119c99-4e57-4896-909e-4fc0e7c420f7", "metadata": {}, "outputs": [ @@ -105,50 +98,50 @@ " base64_url\n", " gtfs_dataset_key\n", " gtfs_dataset_name\n", - " type\n", - " regional_feed_type\n", " name\n", + " regional_feed_type\n", + " type\n", " \n", " \n", " \n", " \n", " 0\n", - " 23f051077b6cd93dd69c310715d0163c\n", - " 2023-09-13\n", - " 2d1d563676bfb98dd46384e08917b54e\n", + " ba5a81692fc0dd50972e84502fc529cc\n", + " 2023-10-18\n", + " 132d2fed3f191ebe86e3de2c7cd31a4a\n", " America/Los_Angeles\n", " aHR0cHM6Ly90Y3J0YS50cmlwc2hvdC5jb20vdjEvZ3Rmcy...\n", " 0139b1253130b33adcd4b3a4490530d2\n", " TCRTA TripShot Schedule\n", - " schedule\n", - " None\n", " TCRTA TripShot Schedule\n", + " None\n", + " schedule\n", " \n", " \n", " 1\n", - " 8098185cd85182fabca8ed44a2bb468f\n", - " 2023-09-13\n", - " 4f186e9c17acba5c1129db8a5c9b5ec6\n", + " 01d44336e6421ae3cc2be5a0d68a3e4f\n", + " 2023-10-18\n", + " 5ad0314c752ed78142d6ebbf7e63f922\n", " America/Los_Angeles\n", - " aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZW...\n", - " 015d67d5b75b5cf2b710bbadadfb75f5\n", - " Bay Area 511 Marin Schedule\n", + " aHR0cHM6Ly9hcHAubWVjYXRyYW4uY29tL3VyYi93cy9mZW...\n", + " 014d0998350083249a9eb310635548c2\n", + " SLO Schedule\n", + " SLO Schedule\n", + " None\n", " schedule\n", - " Regional Subfeed\n", - " Bay Area 511 Marin Schedule\n", " \n", " \n", " 2\n", - " 40d243c55619eb784bdfc012d9ac5463\n", - " 2023-09-13\n", - " 0b53dbabeda04060bbe7c94e21b28a79\n", + " e35ed9aa390c162b7039aefe2df4208e\n", + " 2023-10-18\n", + " 43bda252fd929bf57f18a19b780ec33b\n", " America/Los_Angeles\n", " aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZW...\n", - " 04d1db905ac689e17a97ce414cf393a6\n", - " Bay Area 511 Angel Island-Tiburon Ferry Schedule\n", - " schedule\n", + " 015d67d5b75b5cf2b710bbadadfb75f5\n", + " Bay Area 511 Marin Schedule\n", + " Bay Area 511 Marin Schedule\n", " Regional Subfeed\n", - " Bay Area 511 Angel Island-Tiburon Ferry Schedule\n", + " schedule\n", " \n", " \n", "\n", @@ -156,37 +149,32 @@ ], "text/plain": [ " key date \\\n", - "0 23f051077b6cd93dd69c310715d0163c 2023-09-13 \n", - "1 8098185cd85182fabca8ed44a2bb468f 2023-09-13 \n", - "2 40d243c55619eb784bdfc012d9ac5463 2023-09-13 \n", + "0 ba5a81692fc0dd50972e84502fc529cc 2023-10-18 \n", + "1 01d44336e6421ae3cc2be5a0d68a3e4f 2023-10-18 \n", + "2 e35ed9aa390c162b7039aefe2df4208e 2023-10-18 \n", "\n", " feed_key feed_timezone \\\n", - "0 2d1d563676bfb98dd46384e08917b54e America/Los_Angeles \n", - "1 4f186e9c17acba5c1129db8a5c9b5ec6 America/Los_Angeles \n", - "2 0b53dbabeda04060bbe7c94e21b28a79 America/Los_Angeles \n", + "0 132d2fed3f191ebe86e3de2c7cd31a4a America/Los_Angeles \n", + "1 5ad0314c752ed78142d6ebbf7e63f922 America/Los_Angeles \n", + "2 43bda252fd929bf57f18a19b780ec33b America/Los_Angeles \n", "\n", " base64_url \\\n", "0 aHR0cHM6Ly90Y3J0YS50cmlwc2hvdC5jb20vdjEvZ3Rmcy... \n", - "1 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZW... \n", + "1 aHR0cHM6Ly9hcHAubWVjYXRyYW4uY29tL3VyYi93cy9mZW... \n", "2 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZW... \n", "\n", - " gtfs_dataset_key \\\n", - "0 0139b1253130b33adcd4b3a4490530d2 \n", - "1 015d67d5b75b5cf2b710bbadadfb75f5 \n", - "2 04d1db905ac689e17a97ce414cf393a6 \n", - "\n", - " gtfs_dataset_name type \\\n", - "0 TCRTA TripShot Schedule schedule \n", - "1 Bay Area 511 Marin Schedule schedule \n", - "2 Bay Area 511 Angel Island-Tiburon Ferry Schedule schedule \n", + " gtfs_dataset_key gtfs_dataset_name \\\n", + "0 0139b1253130b33adcd4b3a4490530d2 TCRTA TripShot Schedule \n", + "1 014d0998350083249a9eb310635548c2 SLO Schedule \n", + "2 015d67d5b75b5cf2b710bbadadfb75f5 Bay Area 511 Marin Schedule \n", "\n", - " regional_feed_type name \n", - "0 None TCRTA TripShot Schedule \n", - "1 Regional Subfeed Bay Area 511 Marin Schedule \n", - "2 Regional Subfeed Bay Area 511 Angel Island-Tiburon Ferry Schedule " + " name regional_feed_type type \n", + "0 TCRTA TripShot Schedule None schedule \n", + "1 SLO Schedule None schedule \n", + "2 Bay Area 511 Marin Schedule Regional Subfeed schedule " ] }, - "execution_count": 4, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -197,33 +185,428 @@ }, { "cell_type": "code", - "execution_count": 5, - "id": "467b6dbb-6c7c-4a8e-8a34-e5d2f478524a", + "execution_count": 33, + "id": "99a28abf-f771-4873-861c-e613a07d099e", + "metadata": {}, + "outputs": [], + "source": [ + "operator_feeds = feeds_on_target.feed_key" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "53732ff0-4ba5-4b2b-bfb4-0d2f4a61de55", + "metadata": {}, + "outputs": [], + "source": [ + "trips = (\n", + " tbls.mart_gtfs.fct_scheduled_trips()\n", + " >> filter(_.feed_key.isin(operator_feeds), _.service_date == target_date)\n", + " >> group_by(_.feed_key)\n", + " >> count(_.feed_key)\n", + " # >> collect()\n", + " # >> mutate(any_trip = True)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "b3de2066-875f-4729-a935-3dc4b0988e31", + "metadata": {}, + "outputs": [], + "source": [ + "service_defined = trips >> collect()" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "82b1cd8d-eae8-4014-be31-6830e83f0734", + "metadata": {}, + "outputs": [], + "source": [ + "feeds_on_target = (feeds_on_target >> left_join(_, service_defined, on = 'feed_key')\n", + " >> select(-_.name)\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "f8434c0f-4699-422f-95ae-27d6b6e2dffd", "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
keydatefeed_keyfeed_timezonebase64_urlgtfs_dataset_keygtfs_dataset_nameregional_feed_typetypen
53c927bb3d92c13a63a7900caa77f4bee02023-10-18a9a4672431e928089176517c3297db66America/Los_AngelesaHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW...4a9e6a8b8db445bc9fc3cf398ded67b1Glendora ScheduleNonescheduleNaN
55fd77340aba25ef0767ecebcba3f0d0de2023-10-18f561f3f554f4ef3f22121116be67b2f0US/PacificaHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi...4b84ffbdc2b6abb171a5df6ce8f06797Wasco DAR ScheduleNonescheduleNaN
91bdd83751c48788bf23a2a9c571bd0d5e2023-10-1854c22d251df6fdaf2b3c1d699f4a739bAmerica/Los_AngelesaHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW...73e1cb24210dab4feb13fbf2924939d2Maywood ScheduleNonescheduleNaN
99a1647ad5f590c79d0b5f185b098560642023-10-18e4af9a8cc80c88b868f066824d992d9fAmerica/Los_AngelesaHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi...82a0ba5e020f951f6e780761537ef12bStanislaus FlexNonescheduleNaN
101fcb8bd67a2f3bfdac5d819feb5b14f042023-10-1807feb14721d9fe332a8e7fb37bf625ddAmerica/New_YorkaHR0cDovL3JpZGVndHJhbnMuY29tL2d0ZnMuemlw85e9d75e8430f242e9f5600d2f5c6964G Trans ScheduleNonescheduleNaN
103f4e59cc5e52efaa8fad075e7aa9d693e2023-10-18a43c08f7ed73ee9e88204a260933b461America/Los_AngelesaHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3...88780135c261b5b1391afdd5d562e5baTracy ScheduleNonescheduleNaN
1092353fa44db8fac09e9b2a994eb5799532023-10-186cf65a84397884d0ffb44db5b8a08bd4America/Los_AngelesaHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW...8e3f49cfd30b44746a0724ca60e596caInglewood ScheduleNonescheduleNaN
12202ddc9b2168f50ae0bdce2fa101ce9582023-10-18e68cd2ffe7bb1760d28f3a94e3a31dacAmerica/Los_AngelesaHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi...9ca0bbfd2ada3d686f1c3a136c21eafcSan Juan Capistrano ScheduleNonescheduleNaN
139b9b6949a4dd7548ea9733d62099fd0fa2023-10-18696beb3cb2e375f8524ae18eff0d041dAmerica/Los_AngelesaHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi...b62b4267caca504e1057c52a91611c16Eastern Sierra FlexNonescheduleNaN
1542a6176818feb911d0d444d7268594cc32023-10-18062563b11ac99ddec6d3bec6f613b78dAmerica/Los_AngelesaHR0cHM6Ly9tamNhY3Rpb24uY29tL01KQ19HVEZTX1B1Ym...c599bcb93f9c039473932479eb082d7dMorro Bay ScheduleNonescheduleNaN
16294e92e15f61f2e04ba18e4d54e98bb9b2023-10-181b77ef49f5bc70038cbf15e4f5f98477America/Los_AngelesaHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW...cd08875e7d95ed218f98b7694ac8ea3fCompton ScheduleNonescheduleNaN
1636c486bf9f5ff3c0fbceb3c091f8ae0092023-10-18fe662c95bddfb6e5fd75cb0afbb85cd8America/Los_AngelesaHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi...cde2b7a63ab7bb33141c8b02b001ea0fTCAT FlexNonescheduleNaN
17137f0266a8efb77b896d7dc17b0d154832023-10-18e055f64de6bf34b0d31d67b6d6e31dfbAmerica/Los_AngelesaHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi...da71c7121e987c3cd333d48ee24e23efRedwood Coast FlexNonescheduleNaN
172cac869872a0a2cc693f3fe371dbef0642023-10-18d95f2f26bbf4846e4eb84d352fb0990dAmerica/Los_AngelesaHR0cHM6Ly93d3cucmVkb25kby5vcmcvY2l2aWNheC9maW...da7e9e09d3eec6c7686adc21c8b28b63Beach Cities ScheduleNonescheduleNaN
19198a8b1b5e7cb731c206dfc8eb105b4d72023-10-18d5b1960f462b2798ad3d2bbd7a77a01eAmerica/Los_AngelesaHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW...f61fbdf46f6ea735259b9d0c48139ebeArtesia ScheduleNonescheduleNaN
20052f018f92d2bc0d86e60b4e3fa5cea482023-10-18d1fd5c603d131faf826423a8f00aa7acAmerica/Los_AngelesaHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3...ff51495a24cf286ff2f7bc9b3401f855Blossom Express ScheduleNonescheduleNaN
\n", + "
" + ], "text/plain": [ - "(201, 10)" + " key date \\\n", + "53 c927bb3d92c13a63a7900caa77f4bee0 2023-10-18 \n", + "55 fd77340aba25ef0767ecebcba3f0d0de 2023-10-18 \n", + "91 bdd83751c48788bf23a2a9c571bd0d5e 2023-10-18 \n", + "99 a1647ad5f590c79d0b5f185b09856064 2023-10-18 \n", + "101 fcb8bd67a2f3bfdac5d819feb5b14f04 2023-10-18 \n", + "103 f4e59cc5e52efaa8fad075e7aa9d693e 2023-10-18 \n", + "109 2353fa44db8fac09e9b2a994eb579953 2023-10-18 \n", + "122 02ddc9b2168f50ae0bdce2fa101ce958 2023-10-18 \n", + "139 b9b6949a4dd7548ea9733d62099fd0fa 2023-10-18 \n", + "154 2a6176818feb911d0d444d7268594cc3 2023-10-18 \n", + "162 94e92e15f61f2e04ba18e4d54e98bb9b 2023-10-18 \n", + "163 6c486bf9f5ff3c0fbceb3c091f8ae009 2023-10-18 \n", + "171 37f0266a8efb77b896d7dc17b0d15483 2023-10-18 \n", + "172 cac869872a0a2cc693f3fe371dbef064 2023-10-18 \n", + "191 98a8b1b5e7cb731c206dfc8eb105b4d7 2023-10-18 \n", + "200 52f018f92d2bc0d86e60b4e3fa5cea48 2023-10-18 \n", + "\n", + " feed_key feed_timezone \\\n", + "53 a9a4672431e928089176517c3297db66 America/Los_Angeles \n", + "55 f561f3f554f4ef3f22121116be67b2f0 US/Pacific \n", + "91 54c22d251df6fdaf2b3c1d699f4a739b America/Los_Angeles \n", + "99 e4af9a8cc80c88b868f066824d992d9f America/Los_Angeles \n", + "101 07feb14721d9fe332a8e7fb37bf625dd America/New_York \n", + "103 a43c08f7ed73ee9e88204a260933b461 America/Los_Angeles \n", + "109 6cf65a84397884d0ffb44db5b8a08bd4 America/Los_Angeles \n", + "122 e68cd2ffe7bb1760d28f3a94e3a31dac America/Los_Angeles \n", + "139 696beb3cb2e375f8524ae18eff0d041d America/Los_Angeles \n", + "154 062563b11ac99ddec6d3bec6f613b78d America/Los_Angeles \n", + "162 1b77ef49f5bc70038cbf15e4f5f98477 America/Los_Angeles \n", + "163 fe662c95bddfb6e5fd75cb0afbb85cd8 America/Los_Angeles \n", + "171 e055f64de6bf34b0d31d67b6d6e31dfb America/Los_Angeles \n", + "172 d95f2f26bbf4846e4eb84d352fb0990d America/Los_Angeles \n", + "191 d5b1960f462b2798ad3d2bbd7a77a01e America/Los_Angeles \n", + "200 d1fd5c603d131faf826423a8f00aa7ac America/Los_Angeles \n", + "\n", + " base64_url \\\n", + "53 aHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW... \n", + "55 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", + "91 aHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW... \n", + "99 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", + "101 aHR0cDovL3JpZGVndHJhbnMuY29tL2d0ZnMuemlw \n", + "103 aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3... \n", + "109 aHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW... \n", + "122 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", + "139 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", + "154 aHR0cHM6Ly9tamNhY3Rpb24uY29tL01KQ19HVEZTX1B1Ym... \n", + "162 aHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW... \n", + "163 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", + "171 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", + "172 aHR0cHM6Ly93d3cucmVkb25kby5vcmcvY2l2aWNheC9maW... \n", + "191 aHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW... \n", + "200 aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3... \n", + "\n", + " gtfs_dataset_key gtfs_dataset_name \\\n", + "53 4a9e6a8b8db445bc9fc3cf398ded67b1 Glendora Schedule \n", + "55 4b84ffbdc2b6abb171a5df6ce8f06797 Wasco DAR Schedule \n", + "91 73e1cb24210dab4feb13fbf2924939d2 Maywood Schedule \n", + "99 82a0ba5e020f951f6e780761537ef12b Stanislaus Flex \n", + "101 85e9d75e8430f242e9f5600d2f5c6964 G Trans Schedule \n", + "103 88780135c261b5b1391afdd5d562e5ba Tracy Schedule \n", + "109 8e3f49cfd30b44746a0724ca60e596ca Inglewood Schedule \n", + "122 9ca0bbfd2ada3d686f1c3a136c21eafc San Juan Capistrano Schedule \n", + "139 b62b4267caca504e1057c52a91611c16 Eastern Sierra Flex \n", + "154 c599bcb93f9c039473932479eb082d7d Morro Bay Schedule \n", + "162 cd08875e7d95ed218f98b7694ac8ea3f Compton Schedule \n", + "163 cde2b7a63ab7bb33141c8b02b001ea0f TCAT Flex \n", + "171 da71c7121e987c3cd333d48ee24e23ef Redwood Coast Flex \n", + "172 da7e9e09d3eec6c7686adc21c8b28b63 Beach Cities Schedule \n", + "191 f61fbdf46f6ea735259b9d0c48139ebe Artesia Schedule \n", + "200 ff51495a24cf286ff2f7bc9b3401f855 Blossom Express Schedule \n", + "\n", + " regional_feed_type type n \n", + "53 None schedule NaN \n", + "55 None schedule NaN \n", + "91 None schedule NaN \n", + "99 None schedule NaN \n", + "101 None schedule NaN \n", + "103 None schedule NaN \n", + "109 None schedule NaN \n", + "122 None schedule NaN \n", + "139 None schedule NaN \n", + "154 None schedule NaN \n", + "162 None schedule NaN \n", + "163 None schedule NaN \n", + "171 None schedule NaN \n", + "172 None schedule NaN \n", + "191 None schedule NaN \n", + "200 None schedule NaN " ] }, - "execution_count": 5, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "feeds_on_target.shape" + "# feeds without any service defined on target date\n", + "# TODO lookback/recursion?\n", + "# TODO column for \"service has service defined in another feed, ex. BCT -> GMV BCT\"\n", + "feeds_on_target >> filter(_.n.isna())" ] }, { "cell_type": "code", - "execution_count": 6, - "id": "fbdae203-2f11-4c14-a5d4-2320d5844847", + "execution_count": 28, + "id": "467b6dbb-6c7c-4a8e-8a34-e5d2f478524a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(201, 11)" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "metro_test = (feeds_on_target >> filter(_.gtfs_dataset_name.str.contains('LA Metro'))).base64_url.iloc[0]" + "feeds_on_target.shape" ] }, { @@ -238,22 +621,17 @@ }, { "cell_type": "code", - "execution_count": 7, - "id": "3cd49887-58bb-483f-8930-a4ab58688897", + "execution_count": 4, + "id": "ae9afaa4-64da-4b62-a22d-d60671278761", "metadata": {}, "outputs": [], "source": [ - "regions = {}\n", - "# Set bounds for northern california\n", - "regions['norcal'] = {'north': 42.03909, 'south': 39.07038, 'east': -119.60541, 'west': -124.49158}\n", - "regions['central'] = {'north': 39.64165, 'south': 35.87347, 'east': -117.53174, 'west': -123.83789}\n", - "regions['socal'] = {'north': 35.8935, 'south': 32.5005, 'east': -114.13121, 'west': -121.46759}\n", - "regions['mojave'] = {'north': 37.81629, 'south': 34.89945, 'east': -114.59015, 'west': -118.38043}" + "regions = conveyal_vars.conveyal_regions" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "id": "1f021355-1f40-4121-8a6d-32eeb4a1c52e", "metadata": {}, "outputs": [], @@ -263,7 +641,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 5, "id": "87642e47-80eb-4a21-81c4-e41f467a7c8d", "metadata": {}, "outputs": [], @@ -275,7 +653,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 14, "id": "328b7a62-e5b7-47b8-b0e2-21f971b8e9e0", "metadata": {}, "outputs": [], @@ -285,7 +663,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 15, "id": "5b8d6fd6-6847-4121-a6c8-212a7d01f5b7", "metadata": {}, "outputs": [], @@ -295,7 +673,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 16, "id": "4a3502e5-7240-4bb1-bf33-28c96c316c3a", "metadata": {}, "outputs": [ @@ -383,7 +761,7 @@ "3 [-118.38043, 34.89945, -114.59015, 37.81629] " ] }, - "execution_count": 12, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -394,17 +772,18 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 17, "id": "97d117b1-8835-473a-893b-ead256f4fba5", "metadata": {}, "outputs": [], "source": [ - "df['geometry'] = df.apply(lambda x: shapely.geometry.box(*x.bbox), axis = 1)" + "df['geometry'] = df.apply(lambda x: shapely.geometry.box(*x.bbox), axis = 1)\n", + "df = df >> select(-_.bbox)" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 18, "id": "6b9402e0-24de-43c4-b04e-0c903a4657d6", "metadata": {}, "outputs": [], @@ -414,17 +793,31 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 19, "id": "01160e41-81f6-45b8-ae76-86041b8f478f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
Make this Notebook Trusted to load map: File -> Trust Notebook
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# region_gdf.explore()" + "region_gdf.explore()" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 71, "id": "98644c75-cd9c-4032-8406-2f4a14a6edfd", "metadata": {}, "outputs": [ @@ -443,7 +836,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 72, "id": "64a57fbd-26da-47d0-b549-254961afd70d", "metadata": {}, "outputs": [], @@ -453,17 +846,17 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 73, "id": "d1f999fc-9295-4478-85cf-d5cc6e0b5956", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(85555, 5)" + "(88593, 5)" ] }, - "execution_count": 18, + "execution_count": 73, "metadata": {}, "output_type": "execute_result" } @@ -474,7 +867,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 74, "id": "9a15d156-683f-4f15-8638-67054462baa0", "metadata": {}, "outputs": [], @@ -484,7 +877,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 75, "id": "306c2065-0c23-43a5-aa47-c2950615cccf", "metadata": {}, "outputs": [], @@ -496,7 +889,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 76, "id": "02847606-a347-4f48-827a-eca90ae45207", "metadata": {}, "outputs": [ @@ -545,11 +938,11 @@ " -124.49158\n", " [-124.49158, 39.07038, -119.60541, 42.03909]\n", " POLYGON ((34109.459 117232.679, 32737.676 4468...\n", - " 63790\n", - " 87d5bfe86ca395b5437686424e71e323\n", - " f7a2121f01eef823fb9018028a5c7f41\n", - " 2558793\n", - " Rough and Ready Hwy at Grubstake Trl\n", + " 52148\n", + " 64fa4388fc2492d469f6049e40c14f20\n", + " 7bd3d8c32eda4869c4d7f8bf2aec5bb0\n", + " db86bc0b-d3af-4163-a30a-6ece57770dbe\n", + " Colfax (Standlock Bottle Shop)\n", " \n", " \n", " 1\n", @@ -560,11 +953,11 @@ " -123.83789\n", " [-123.83789, 35.87347, -117.53174, 39.64165]\n", " POLYGON ((222589.608 -235276.505, 211687.216 1...\n", - " 63790\n", - " 87d5bfe86ca395b5437686424e71e323\n", - " f7a2121f01eef823fb9018028a5c7f41\n", - " 2558793\n", - " Rough and Ready Hwy at Grubstake Trl\n", + " 52148\n", + " 64fa4388fc2492d469f6049e40c14f20\n", + " 7bd3d8c32eda4869c4d7f8bf2aec5bb0\n", + " db86bc0b-d3af-4163-a30a-6ece57770dbe\n", + " Colfax (Standlock Bottle Shop)\n", " \n", " \n", " 0\n", @@ -575,11 +968,11 @@ " -124.49158\n", " [-124.49158, 39.07038, -119.60541, 42.03909]\n", " POLYGON ((34109.459 117232.679, 32737.676 4468...\n", - " 63824\n", - " 7b2962ad52dc69eb89dc21bb5aa353d1\n", - " f7a2121f01eef823fb9018028a5c7f41\n", - " 2558785\n", - " Rough and Ready Hwy at Valley Rd\n", + " 29210\n", + " fabfb6cc39802b79ce2bb67e4fa3cd45\n", + " 102b5149d072a697aeda213b8b72811b\n", + " 4001\n", + " Colfax Depot\n", " \n", " \n", "\n", @@ -597,22 +990,22 @@ "0 [-124.49158, 39.07038, -119.60541, 42.03909] \n", "\n", " geometry index_right \\\n", - "0 POLYGON ((34109.459 117232.679, 32737.676 4468... 63790 \n", - "1 POLYGON ((222589.608 -235276.505, 211687.216 1... 63790 \n", - "0 POLYGON ((34109.459 117232.679, 32737.676 4468... 63824 \n", + "0 POLYGON ((34109.459 117232.679, 32737.676 4468... 52148 \n", + "1 POLYGON ((222589.608 -235276.505, 211687.216 1... 52148 \n", + "0 POLYGON ((34109.459 117232.679, 32737.676 4468... 29210 \n", "\n", " key feed_key \\\n", - "0 87d5bfe86ca395b5437686424e71e323 f7a2121f01eef823fb9018028a5c7f41 \n", - "1 87d5bfe86ca395b5437686424e71e323 f7a2121f01eef823fb9018028a5c7f41 \n", - "0 7b2962ad52dc69eb89dc21bb5aa353d1 f7a2121f01eef823fb9018028a5c7f41 \n", + "0 64fa4388fc2492d469f6049e40c14f20 7bd3d8c32eda4869c4d7f8bf2aec5bb0 \n", + "1 64fa4388fc2492d469f6049e40c14f20 7bd3d8c32eda4869c4d7f8bf2aec5bb0 \n", + "0 fabfb6cc39802b79ce2bb67e4fa3cd45 102b5149d072a697aeda213b8b72811b \n", "\n", - " stop_id stop_name \n", - "0 2558793 Rough and Ready Hwy at Grubstake Trl \n", - "1 2558793 Rough and Ready Hwy at Grubstake Trl \n", - "0 2558785 Rough and Ready Hwy at Valley Rd " + " stop_id stop_name \n", + "0 db86bc0b-d3af-4163-a30a-6ece57770dbe Colfax (Standlock Bottle Shop) \n", + "1 db86bc0b-d3af-4163-a30a-6ece57770dbe Colfax (Standlock Bottle Shop) \n", + "0 4001 Colfax Depot " ] }, - "execution_count": 21, + "execution_count": 76, "metadata": {}, "output_type": "execute_result" } @@ -623,7 +1016,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 77, "id": "40afc409-7952-4a92-8f6c-0e4757d14b61", "metadata": {}, "outputs": [], @@ -633,17 +1026,17 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 78, "id": "85306aab-a9cf-4141-aa4b-5a4aa937e6ec", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(193, 2)" + "(197, 2)" ] }, - "execution_count": 23, + "execution_count": 78, "metadata": {}, "output_type": "execute_result" } @@ -654,7 +1047,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 79, "id": "83802555-c94e-4c19-a842-6c0e5d033790", "metadata": {}, "outputs": [], @@ -665,7 +1058,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 80, "id": "457f1e27-5c02-4a29-a524-fbc62984bf27", "metadata": {}, "outputs": [ @@ -701,44 +1094,44 @@ " \n", " 0\n", " norcal\n", - " f7a2121f01eef823fb9018028a5c7f41\n", - " Nevada County Schedule\n", - " aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3...\n", - " 2023-09-13\n", + " 7bd3d8c32eda4869c4d7f8bf2aec5bb0\n", + " Flixbus Schedule\n", + " aHR0cDovL2d0ZnMuZ2lzLmZsaXgudGVjaC9ndGZzX2dlbm...\n", + " 2023-10-18\n", " \n", " \n", " 1\n", " central\n", - " f7a2121f01eef823fb9018028a5c7f41\n", - " Nevada County Schedule\n", - " aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3...\n", - " 2023-09-13\n", + " 7bd3d8c32eda4869c4d7f8bf2aec5bb0\n", + " Flixbus Schedule\n", + " aHR0cDovL2d0ZnMuZ2lzLmZsaXgudGVjaC9ndGZzX2dlbm...\n", + " 2023-10-18\n", " \n", " \n", " 2\n", - " norcal\n", - " 1b2a9ad705c963c7b44931ffd2280e09\n", - " TART, North Lake Tahoe Schedule\n", - " aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3...\n", - " 2023-09-13\n", + " socal\n", + " 7bd3d8c32eda4869c4d7f8bf2aec5bb0\n", + " Flixbus Schedule\n", + " aHR0cDovL2d0ZnMuZ2lzLmZsaXgudGVjaC9ndGZzX2dlbm...\n", + " 2023-10-18\n", " \n", " \n", "\n", "" ], "text/plain": [ - " region feed_key gtfs_dataset_name \\\n", - "0 norcal f7a2121f01eef823fb9018028a5c7f41 Nevada County Schedule \n", - "1 central f7a2121f01eef823fb9018028a5c7f41 Nevada County Schedule \n", - "2 norcal 1b2a9ad705c963c7b44931ffd2280e09 TART, North Lake Tahoe Schedule \n", + " region feed_key gtfs_dataset_name \\\n", + "0 norcal 7bd3d8c32eda4869c4d7f8bf2aec5bb0 Flixbus Schedule \n", + "1 central 7bd3d8c32eda4869c4d7f8bf2aec5bb0 Flixbus Schedule \n", + "2 socal 7bd3d8c32eda4869c4d7f8bf2aec5bb0 Flixbus Schedule \n", "\n", " base64_url date \n", - "0 aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3... 2023-09-13 \n", - "1 aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3... 2023-09-13 \n", - "2 aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3... 2023-09-13 " + "0 aHR0cDovL2d0ZnMuZ2lzLmZsaXgudGVjaC9ndGZzX2dlbm... 2023-10-18 \n", + "1 aHR0cDovL2d0ZnMuZ2lzLmZsaXgudGVjaC9ndGZzX2dlbm... 2023-10-18 \n", + "2 aHR0cDovL2d0ZnMuZ2lzLmZsaXgudGVjaC9ndGZzX2dlbm... 2023-10-18 " ] }, - "execution_count": 25, + "execution_count": 80, "metadata": {}, "output_type": "execute_result" } @@ -1156,7 +1549,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 81, "id": "2befb3c9-36ff-4128-8561-31f2d7aea1cd", "metadata": {}, "outputs": [], @@ -1166,7 +1559,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 82, "id": "5b6d7cb9-8557-4f48-befb-8522efaea5df", "metadata": {}, "outputs": [ @@ -1176,7 +1569,7 @@ "dict_keys(['norcal', 'central', 'socal', 'mojave'])" ] }, - "execution_count": 33, + "execution_count": 82, "metadata": {}, "output_type": "execute_result" } @@ -1187,7 +1580,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 102, "id": "56df1e83-9887-4aaf-9fc3-4bf2ec794a0f", "metadata": {}, "outputs": [], @@ -1195,13 +1588,13 @@ "def download_feed(row):\n", " # need wildcard for file too -- not all are gtfs.zip!\n", " uri = f'gs://calitp-gtfs-schedule-raw-v2/schedule/dt={row.date.strftime(\"%Y-%m-%d\")}/*/base64_url={row.base64_url}/*.zip'\n", - " fs.get(uri, f'{row.path}/{row.feed_key}_gtfs.zip')\n", + " fs.get(uri, f'{row.path}/{row.gtfs_dataset_name.replace(\" \", \"_\")}_{row.feed_key}_gtfs.zip')\n", " # print(f'downloaded {row.path}/{row.feed_key}_gtfs.zip')" ] }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 103, "id": "c88007b8-d075-402d-80af-a317e18a112e", "metadata": {}, "outputs": [], @@ -1218,7 +1611,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 104, "id": "492a57c5-d64f-4293-88fe-bd8febb3ef2d", "metadata": {}, "outputs": [ @@ -1251,7 +1644,7 @@ " \n", " 0\n", " central\n", - " 81\n", + " 82\n", " \n", " \n", " 1\n", @@ -1261,12 +1654,12 @@ " \n", " 2\n", " norcal\n", - " 24\n", + " 25\n", " \n", " \n", " 3\n", " socal\n", - " 82\n", + " 84\n", " \n", " \n", "\n", @@ -1274,13 +1667,13 @@ ], "text/plain": [ " region n\n", - "0 central 81\n", + "0 central 82\n", "1 mojave 6\n", - "2 norcal 24\n", - "3 socal 82" + "2 norcal 25\n", + "3 socal 84" ] }, - "execution_count": 41, + "execution_count": 104, "metadata": {}, "output_type": "execute_result" } @@ -1291,7 +1684,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 105, "id": "5fc9e426-e1ca-44fb-a0d3-107c145b0fec", "metadata": { "tags": [] @@ -1300,7 +1693,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "4655f1e7dab84972a8a246c46f19dbab", + "model_id": "446941c220614e509a402c2e3e4fbbb4", "version_major": 2, "version_minor": 0 }, @@ -1314,12 +1707,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "ddf87baaef154016bd72a7d7919737d5", + "model_id": "b6ef350cc757472fbc0074d17ccf96ac", "version_major": 2, "version_minor": 0 }, "text/plain": [ - " 0%| | 0/24 [00:00 Date: Mon, 6 Nov 2023 19:52:53 +0000 Subject: [PATCH 2/4] keep working on scripts --- conveyal_update/README.md | 3 +- conveyal_update/check_download_feeds.ipynb | 805 ++++++++++++++++----- conveyal_update/conveyal_vars.py | 1 + conveyal_update/evaluate_feeds.py | 69 ++ conveyal_update/match_feeds_regions.py | 38 + 5 files changed, 749 insertions(+), 167 deletions(-) create mode 100644 conveyal_update/evaluate_feeds.py create mode 100644 conveyal_update/match_feeds_regions.py diff --git a/conveyal_update/README.md b/conveyal_update/README.md index dd5f74b69..7fde19a63 100644 --- a/conveyal_update/README.md +++ b/conveyal_update/README.md @@ -11,5 +11,4 @@ * Set target date in `conveyal_vars.py`. Region boundaries are also set here, but these should remain static unless the decision is made to use entirely different regions in Conveyal. Target date should be a mid-week day. * TODO `evaluate_feeds.py` includes functions to check to see which feeds have service defined on the target date, and show feeds without any apparent service, including if that service is apparently captured in another feed. This helps check for potential coverage gaps, likely due to GTFS feed expirations and/or the [publishing future service issue](https://github.com/MobilityData/GTFS_Schedule_Best-Practices/issues/48(https://github.com/MobilityData/GTFS_Schedule_Best-Practices/issues/48). You may have to shift the target date around to find the best overall coverage, and/or manually edit important but missing feeds to define service if reasonable. * TODO `match_feeds_regions.py` -* TODO `download_data.py` -* TODO `run_and_handoff.py` runs the above scripts, and additionally generates handoff outputs that can be used to download, crop, and filter OSM data for each region using... \ No newline at end of file +* TODO `download_data.py`, and additionally generates handoff outputs that can be used to download, crop, and filter OSM data for each region using... \ No newline at end of file diff --git a/conveyal_update/check_download_feeds.ipynb b/conveyal_update/check_download_feeds.ipynb index 9d045a2c8..b15262b68 100644 --- a/conveyal_update/check_download_feeds.ipynb +++ b/conveyal_update/check_download_feeds.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "c66b1132-56b5-4b11-9318-b8f268ecab30", "metadata": {}, "outputs": [], @@ -45,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 2, "id": "296bfae5-a830-4b35-a202-87f0c3143c83", "metadata": {}, "outputs": [], @@ -55,7 +55,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 3, "id": "f73da205-2eea-4acf-a649-c1f1e2441ac3", "metadata": {}, "outputs": [], @@ -66,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 4, "id": "d2119c99-4e57-4896-909e-4fc0e7c420f7", "metadata": {}, "outputs": [ @@ -98,8 +98,8 @@ " base64_url\n", " gtfs_dataset_key\n", " gtfs_dataset_name\n", - " name\n", " regional_feed_type\n", + " name\n", " type\n", " \n", " \n", @@ -113,8 +113,8 @@ " aHR0cHM6Ly90Y3J0YS50cmlwc2hvdC5jb20vdjEvZ3Rmcy...\n", " 0139b1253130b33adcd4b3a4490530d2\n", " TCRTA TripShot Schedule\n", - " TCRTA TripShot Schedule\n", " None\n", + " TCRTA TripShot Schedule\n", " schedule\n", " \n", " \n", @@ -126,8 +126,8 @@ " aHR0cHM6Ly9hcHAubWVjYXRyYW4uY29tL3VyYi93cy9mZW...\n", " 014d0998350083249a9eb310635548c2\n", " SLO Schedule\n", - " SLO Schedule\n", " None\n", + " SLO Schedule\n", " schedule\n", " \n", " \n", @@ -139,8 +139,8 @@ " aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZW...\n", " 015d67d5b75b5cf2b710bbadadfb75f5\n", " Bay Area 511 Marin Schedule\n", - " Bay Area 511 Marin Schedule\n", " Regional Subfeed\n", + " Bay Area 511 Marin Schedule\n", " schedule\n", " \n", " \n", @@ -168,13 +168,13 @@ "1 014d0998350083249a9eb310635548c2 SLO Schedule \n", "2 015d67d5b75b5cf2b710bbadadfb75f5 Bay Area 511 Marin Schedule \n", "\n", - " name regional_feed_type type \n", - "0 TCRTA TripShot Schedule None schedule \n", - "1 SLO Schedule None schedule \n", - "2 Bay Area 511 Marin Schedule Regional Subfeed schedule " + " regional_feed_type name type \n", + "0 None TCRTA TripShot Schedule schedule \n", + "1 None SLO Schedule schedule \n", + "2 Regional Subfeed Bay Area 511 Marin Schedule schedule " ] }, - "execution_count": 32, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -185,7 +185,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 5, "id": "99a28abf-f771-4873-861c-e613a07d099e", "metadata": {}, "outputs": [], @@ -195,7 +195,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 6, "id": "53732ff0-4ba5-4b2b-bfb4-0d2f4a61de55", "metadata": {}, "outputs": [], @@ -212,7 +212,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 7, "id": "b3de2066-875f-4729-a935-3dc4b0988e31", "metadata": {}, "outputs": [], @@ -222,19 +222,38 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 8, + "id": "5aabc27c-d744-4811-bd4f-0e524c994293", + "metadata": {}, + "outputs": [], + "source": [ + "target_dt = dt.datetime.combine(target_date, dt.time(0))\n", + "\n", + "services = (tbls.mart_transit_database.dim_gtfs_service_data()\n", + " >> filter(_._valid_from <= target_dt, _._valid_to > target_dt)\n", + " # >> filter(_.gtfs_dataset_key == 'da7e9e09d3eec6c7686adc21c8b28b63') # test with BCT\n", + " # >> filter(_.service_key == '5bc7371dca26d74a99be945b18b3174e')\n", + " >> select(_.service_key, _.gtfs_dataset_key)\n", + " >> collect()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, "id": "82b1cd8d-eae8-4014-be31-6830e83f0734", "metadata": {}, "outputs": [], "source": [ "feeds_on_target = (feeds_on_target >> left_join(_, service_defined, on = 'feed_key')\n", " >> select(-_.name)\n", + " >> left_join(_, services, on='gtfs_dataset_key')\n", " )" ] }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 10, "id": "f8434c0f-4699-422f-95ae-27d6b6e2dffd", "metadata": {}, "outputs": [ @@ -269,11 +288,12 @@ " regional_feed_type\n", " type\n", " n\n", + " service_key\n", " \n", " \n", " \n", " \n", - " 53\n", + " 62\n", " c927bb3d92c13a63a7900caa77f4bee0\n", " 2023-10-18\n", " a9a4672431e928089176517c3297db66\n", @@ -284,9 +304,10 @@ " None\n", " schedule\n", " NaN\n", + " f157fa35c6207fceeb9d883a62caa016\n", " \n", " \n", - " 55\n", + " 64\n", " fd77340aba25ef0767ecebcba3f0d0de\n", " 2023-10-18\n", " f561f3f554f4ef3f22121116be67b2f0\n", @@ -297,9 +318,10 @@ " None\n", " schedule\n", " NaN\n", + " 570f0f680cba9575db030462cbba50d1\n", " \n", " \n", - " 91\n", + " 102\n", " bdd83751c48788bf23a2a9c571bd0d5e\n", " 2023-10-18\n", " 54c22d251df6fdaf2b3c1d699f4a739b\n", @@ -310,9 +332,10 @@ " None\n", " schedule\n", " NaN\n", + " 7535ce68155dde3462a1248edc9919e4\n", " \n", " \n", - " 99\n", + " 111\n", " a1647ad5f590c79d0b5f185b09856064\n", " 2023-10-18\n", " e4af9a8cc80c88b868f066824d992d9f\n", @@ -323,9 +346,10 @@ " None\n", " schedule\n", " NaN\n", + " 98322f54467a89178241a42a03c93c66\n", " \n", " \n", - " 101\n", + " 113\n", " fcb8bd67a2f3bfdac5d819feb5b14f04\n", " 2023-10-18\n", " 07feb14721d9fe332a8e7fb37bf625dd\n", @@ -336,9 +360,10 @@ " None\n", " schedule\n", " NaN\n", + " 3e5d9847cfc0bcefbe11f601ad358690\n", " \n", " \n", - " 103\n", + " 115\n", " f4e59cc5e52efaa8fad075e7aa9d693e\n", " 2023-10-18\n", " a43c08f7ed73ee9e88204a260933b461\n", @@ -349,9 +374,10 @@ " None\n", " schedule\n", " NaN\n", + " 242716bc3da3ddd600eeba2cd310fa1c\n", " \n", " \n", - " 109\n", + " 121\n", " 2353fa44db8fac09e9b2a994eb579953\n", " 2023-10-18\n", " 6cf65a84397884d0ffb44db5b8a08bd4\n", @@ -362,9 +388,10 @@ " None\n", " schedule\n", " NaN\n", + " c4ae3c5830ed2661c7b76bb8c946fc72\n", " \n", " \n", - " 122\n", + " 134\n", " 02ddc9b2168f50ae0bdce2fa101ce958\n", " 2023-10-18\n", " e68cd2ffe7bb1760d28f3a94e3a31dac\n", @@ -375,9 +402,10 @@ " None\n", " schedule\n", " NaN\n", + " 2de8641db93b73d3c99194f73c097d37\n", " \n", " \n", - " 139\n", + " 154\n", " b9b6949a4dd7548ea9733d62099fd0fa\n", " 2023-10-18\n", " 696beb3cb2e375f8524ae18eff0d041d\n", @@ -388,9 +416,10 @@ " None\n", " schedule\n", " NaN\n", + " e6ef2ac8d134a1ba042ce6df1a4b1efd\n", " \n", " \n", - " 154\n", + " 175\n", " 2a6176818feb911d0d444d7268594cc3\n", " 2023-10-18\n", " 062563b11ac99ddec6d3bec6f613b78d\n", @@ -401,9 +430,10 @@ " None\n", " schedule\n", " NaN\n", + " a3dca9bf042f372998a3251c3f16053c\n", " \n", " \n", - " 162\n", + " 185\n", " 94e92e15f61f2e04ba18e4d54e98bb9b\n", " 2023-10-18\n", " 1b77ef49f5bc70038cbf15e4f5f98477\n", @@ -414,9 +444,10 @@ " None\n", " schedule\n", " NaN\n", + " 968c9d086e00b106991c299dc47b5c12\n", " \n", " \n", - " 163\n", + " 186\n", " 6c486bf9f5ff3c0fbceb3c091f8ae009\n", " 2023-10-18\n", " fe662c95bddfb6e5fd75cb0afbb85cd8\n", @@ -427,9 +458,10 @@ " None\n", " schedule\n", " NaN\n", + " d5969d0cb382b296935540c720eedc6a\n", " \n", " \n", - " 171\n", + " 194\n", " 37f0266a8efb77b896d7dc17b0d15483\n", " 2023-10-18\n", " e055f64de6bf34b0d31d67b6d6e31dfb\n", @@ -440,9 +472,10 @@ " None\n", " schedule\n", " NaN\n", + " a5659c824eeba107b24ffdcbeb005d15\n", " \n", " \n", - " 172\n", + " 195\n", " cac869872a0a2cc693f3fe371dbef064\n", " 2023-10-18\n", " d95f2f26bbf4846e4eb84d352fb0990d\n", @@ -453,9 +486,10 @@ " None\n", " schedule\n", " NaN\n", + " 5bc7371dca26d74a99be945b18b3174e\n", " \n", " \n", - " 191\n", + " 215\n", " 98a8b1b5e7cb731c206dfc8eb105b4d7\n", " 2023-10-18\n", " d5b1960f462b2798ad3d2bbd7a77a01e\n", @@ -466,9 +500,10 @@ " None\n", " schedule\n", " NaN\n", + " db2ef944fe6d4024e62c6cf638091fce\n", " \n", " \n", - " 200\n", + " 226\n", " 52f018f92d2bc0d86e60b4e3fa5cea48\n", " 2023-10-18\n", " d1fd5c603d131faf826423a8f00aa7ac\n", @@ -479,6 +514,7 @@ " None\n", " schedule\n", " NaN\n", + " 525d6d12e42890a9df6d7c95db907b0d\n", " \n", " \n", "\n", @@ -486,97 +522,97 @@ ], "text/plain": [ " key date \\\n", - "53 c927bb3d92c13a63a7900caa77f4bee0 2023-10-18 \n", - "55 fd77340aba25ef0767ecebcba3f0d0de 2023-10-18 \n", - "91 bdd83751c48788bf23a2a9c571bd0d5e 2023-10-18 \n", - "99 a1647ad5f590c79d0b5f185b09856064 2023-10-18 \n", - "101 fcb8bd67a2f3bfdac5d819feb5b14f04 2023-10-18 \n", - "103 f4e59cc5e52efaa8fad075e7aa9d693e 2023-10-18 \n", - "109 2353fa44db8fac09e9b2a994eb579953 2023-10-18 \n", - "122 02ddc9b2168f50ae0bdce2fa101ce958 2023-10-18 \n", - "139 b9b6949a4dd7548ea9733d62099fd0fa 2023-10-18 \n", - "154 2a6176818feb911d0d444d7268594cc3 2023-10-18 \n", - "162 94e92e15f61f2e04ba18e4d54e98bb9b 2023-10-18 \n", - "163 6c486bf9f5ff3c0fbceb3c091f8ae009 2023-10-18 \n", - "171 37f0266a8efb77b896d7dc17b0d15483 2023-10-18 \n", - "172 cac869872a0a2cc693f3fe371dbef064 2023-10-18 \n", - "191 98a8b1b5e7cb731c206dfc8eb105b4d7 2023-10-18 \n", - "200 52f018f92d2bc0d86e60b4e3fa5cea48 2023-10-18 \n", + "62 c927bb3d92c13a63a7900caa77f4bee0 2023-10-18 \n", + "64 fd77340aba25ef0767ecebcba3f0d0de 2023-10-18 \n", + "102 bdd83751c48788bf23a2a9c571bd0d5e 2023-10-18 \n", + "111 a1647ad5f590c79d0b5f185b09856064 2023-10-18 \n", + "113 fcb8bd67a2f3bfdac5d819feb5b14f04 2023-10-18 \n", + "115 f4e59cc5e52efaa8fad075e7aa9d693e 2023-10-18 \n", + "121 2353fa44db8fac09e9b2a994eb579953 2023-10-18 \n", + "134 02ddc9b2168f50ae0bdce2fa101ce958 2023-10-18 \n", + "154 b9b6949a4dd7548ea9733d62099fd0fa 2023-10-18 \n", + "175 2a6176818feb911d0d444d7268594cc3 2023-10-18 \n", + "185 94e92e15f61f2e04ba18e4d54e98bb9b 2023-10-18 \n", + "186 6c486bf9f5ff3c0fbceb3c091f8ae009 2023-10-18 \n", + "194 37f0266a8efb77b896d7dc17b0d15483 2023-10-18 \n", + "195 cac869872a0a2cc693f3fe371dbef064 2023-10-18 \n", + "215 98a8b1b5e7cb731c206dfc8eb105b4d7 2023-10-18 \n", + "226 52f018f92d2bc0d86e60b4e3fa5cea48 2023-10-18 \n", "\n", " feed_key feed_timezone \\\n", - "53 a9a4672431e928089176517c3297db66 America/Los_Angeles \n", - "55 f561f3f554f4ef3f22121116be67b2f0 US/Pacific \n", - "91 54c22d251df6fdaf2b3c1d699f4a739b America/Los_Angeles \n", - "99 e4af9a8cc80c88b868f066824d992d9f America/Los_Angeles \n", - "101 07feb14721d9fe332a8e7fb37bf625dd America/New_York \n", - "103 a43c08f7ed73ee9e88204a260933b461 America/Los_Angeles \n", - "109 6cf65a84397884d0ffb44db5b8a08bd4 America/Los_Angeles \n", - "122 e68cd2ffe7bb1760d28f3a94e3a31dac America/Los_Angeles \n", - "139 696beb3cb2e375f8524ae18eff0d041d America/Los_Angeles \n", - "154 062563b11ac99ddec6d3bec6f613b78d America/Los_Angeles \n", - "162 1b77ef49f5bc70038cbf15e4f5f98477 America/Los_Angeles \n", - "163 fe662c95bddfb6e5fd75cb0afbb85cd8 America/Los_Angeles \n", - "171 e055f64de6bf34b0d31d67b6d6e31dfb America/Los_Angeles \n", - "172 d95f2f26bbf4846e4eb84d352fb0990d America/Los_Angeles \n", - "191 d5b1960f462b2798ad3d2bbd7a77a01e America/Los_Angeles \n", - "200 d1fd5c603d131faf826423a8f00aa7ac America/Los_Angeles \n", + "62 a9a4672431e928089176517c3297db66 America/Los_Angeles \n", + "64 f561f3f554f4ef3f22121116be67b2f0 US/Pacific \n", + "102 54c22d251df6fdaf2b3c1d699f4a739b America/Los_Angeles \n", + "111 e4af9a8cc80c88b868f066824d992d9f America/Los_Angeles \n", + "113 07feb14721d9fe332a8e7fb37bf625dd America/New_York \n", + "115 a43c08f7ed73ee9e88204a260933b461 America/Los_Angeles \n", + "121 6cf65a84397884d0ffb44db5b8a08bd4 America/Los_Angeles \n", + "134 e68cd2ffe7bb1760d28f3a94e3a31dac America/Los_Angeles \n", + "154 696beb3cb2e375f8524ae18eff0d041d America/Los_Angeles \n", + "175 062563b11ac99ddec6d3bec6f613b78d America/Los_Angeles \n", + "185 1b77ef49f5bc70038cbf15e4f5f98477 America/Los_Angeles \n", + "186 fe662c95bddfb6e5fd75cb0afbb85cd8 America/Los_Angeles \n", + "194 e055f64de6bf34b0d31d67b6d6e31dfb America/Los_Angeles \n", + "195 d95f2f26bbf4846e4eb84d352fb0990d America/Los_Angeles \n", + "215 d5b1960f462b2798ad3d2bbd7a77a01e America/Los_Angeles \n", + "226 d1fd5c603d131faf826423a8f00aa7ac America/Los_Angeles \n", "\n", " base64_url \\\n", - "53 aHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW... \n", - "55 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", - "91 aHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW... \n", - "99 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", - "101 aHR0cDovL3JpZGVndHJhbnMuY29tL2d0ZnMuemlw \n", - "103 aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3... \n", - "109 aHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW... \n", - "122 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", - "139 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", - "154 aHR0cHM6Ly9tamNhY3Rpb24uY29tL01KQ19HVEZTX1B1Ym... \n", - "162 aHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW... \n", - "163 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", - "171 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", - "172 aHR0cHM6Ly93d3cucmVkb25kby5vcmcvY2l2aWNheC9maW... \n", - "191 aHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW... \n", - "200 aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3... \n", + "62 aHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW... \n", + "64 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", + "102 aHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW... \n", + "111 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", + "113 aHR0cDovL3JpZGVndHJhbnMuY29tL2d0ZnMuemlw \n", + "115 aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3... \n", + "121 aHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW... \n", + "134 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", + "154 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", + "175 aHR0cHM6Ly9tamNhY3Rpb24uY29tL01KQ19HVEZTX1B1Ym... \n", + "185 aHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW... \n", + "186 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", + "194 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", + "195 aHR0cHM6Ly93d3cucmVkb25kby5vcmcvY2l2aWNheC9maW... \n", + "215 aHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW... \n", + "226 aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3... \n", "\n", " gtfs_dataset_key gtfs_dataset_name \\\n", - "53 4a9e6a8b8db445bc9fc3cf398ded67b1 Glendora Schedule \n", - "55 4b84ffbdc2b6abb171a5df6ce8f06797 Wasco DAR Schedule \n", - "91 73e1cb24210dab4feb13fbf2924939d2 Maywood Schedule \n", - "99 82a0ba5e020f951f6e780761537ef12b Stanislaus Flex \n", - "101 85e9d75e8430f242e9f5600d2f5c6964 G Trans Schedule \n", - "103 88780135c261b5b1391afdd5d562e5ba Tracy Schedule \n", - "109 8e3f49cfd30b44746a0724ca60e596ca Inglewood Schedule \n", - "122 9ca0bbfd2ada3d686f1c3a136c21eafc San Juan Capistrano Schedule \n", - "139 b62b4267caca504e1057c52a91611c16 Eastern Sierra Flex \n", - "154 c599bcb93f9c039473932479eb082d7d Morro Bay Schedule \n", - "162 cd08875e7d95ed218f98b7694ac8ea3f Compton Schedule \n", - "163 cde2b7a63ab7bb33141c8b02b001ea0f TCAT Flex \n", - "171 da71c7121e987c3cd333d48ee24e23ef Redwood Coast Flex \n", - "172 da7e9e09d3eec6c7686adc21c8b28b63 Beach Cities Schedule \n", - "191 f61fbdf46f6ea735259b9d0c48139ebe Artesia Schedule \n", - "200 ff51495a24cf286ff2f7bc9b3401f855 Blossom Express Schedule \n", + "62 4a9e6a8b8db445bc9fc3cf398ded67b1 Glendora Schedule \n", + "64 4b84ffbdc2b6abb171a5df6ce8f06797 Wasco DAR Schedule \n", + "102 73e1cb24210dab4feb13fbf2924939d2 Maywood Schedule \n", + "111 82a0ba5e020f951f6e780761537ef12b Stanislaus Flex \n", + "113 85e9d75e8430f242e9f5600d2f5c6964 G Trans Schedule \n", + "115 88780135c261b5b1391afdd5d562e5ba Tracy Schedule \n", + "121 8e3f49cfd30b44746a0724ca60e596ca Inglewood Schedule \n", + "134 9ca0bbfd2ada3d686f1c3a136c21eafc San Juan Capistrano Schedule \n", + "154 b62b4267caca504e1057c52a91611c16 Eastern Sierra Flex \n", + "175 c599bcb93f9c039473932479eb082d7d Morro Bay Schedule \n", + "185 cd08875e7d95ed218f98b7694ac8ea3f Compton Schedule \n", + "186 cde2b7a63ab7bb33141c8b02b001ea0f TCAT Flex \n", + "194 da71c7121e987c3cd333d48ee24e23ef Redwood Coast Flex \n", + "195 da7e9e09d3eec6c7686adc21c8b28b63 Beach Cities Schedule \n", + "215 f61fbdf46f6ea735259b9d0c48139ebe Artesia Schedule \n", + "226 ff51495a24cf286ff2f7bc9b3401f855 Blossom Express Schedule \n", "\n", - " regional_feed_type type n \n", - "53 None schedule NaN \n", - "55 None schedule NaN \n", - "91 None schedule NaN \n", - "99 None schedule NaN \n", - "101 None schedule NaN \n", - "103 None schedule NaN \n", - "109 None schedule NaN \n", - "122 None schedule NaN \n", - "139 None schedule NaN \n", - "154 None schedule NaN \n", - "162 None schedule NaN \n", - "163 None schedule NaN \n", - "171 None schedule NaN \n", - "172 None schedule NaN \n", - "191 None schedule NaN \n", - "200 None schedule NaN " + " regional_feed_type type n service_key \n", + "62 None schedule NaN f157fa35c6207fceeb9d883a62caa016 \n", + "64 None schedule NaN 570f0f680cba9575db030462cbba50d1 \n", + "102 None schedule NaN 7535ce68155dde3462a1248edc9919e4 \n", + "111 None schedule NaN 98322f54467a89178241a42a03c93c66 \n", + "113 None schedule NaN 3e5d9847cfc0bcefbe11f601ad358690 \n", + "115 None schedule NaN 242716bc3da3ddd600eeba2cd310fa1c \n", + "121 None schedule NaN c4ae3c5830ed2661c7b76bb8c946fc72 \n", + "134 None schedule NaN 2de8641db93b73d3c99194f73c097d37 \n", + "154 None schedule NaN e6ef2ac8d134a1ba042ce6df1a4b1efd \n", + "175 None schedule NaN a3dca9bf042f372998a3251c3f16053c \n", + "185 None schedule NaN 968c9d086e00b106991c299dc47b5c12 \n", + "186 None schedule NaN d5969d0cb382b296935540c720eedc6a \n", + "194 None schedule NaN a5659c824eeba107b24ffdcbeb005d15 \n", + "195 None schedule NaN 5bc7371dca26d74a99be945b18b3174e \n", + "215 None schedule NaN db2ef944fe6d4024e62c6cf638091fce \n", + "226 None schedule NaN 525d6d12e42890a9df6d7c95db907b0d " ] }, - "execution_count": 37, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -590,17 +626,465 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 14, + "id": "2b81663d-1513-430f-9471-092801753c56", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "((feeds_on_target >> filter(-_.n.isna())).service_key == '5bc7371dca26d74a99be945b18b3174e').any()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "62c97603-a1da-48eb-8515-ea0c5c8346df", + "metadata": {}, + "outputs": [], + "source": [ + "def check_defined_elsewhere(row, df):\n", + " '''\n", + " for feeds without service defined, check if the same service is captured in another feed that does include service\n", + " '''\n", + " is_defined = ((df >> filter(-_.n.isna())).service_key == row.service_key).any()\n", + " row['service_any_feed'] = is_defined\n", + " return row" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "fd5991cf-651d-4eaa-880d-9b4a6fe81ad2", + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
keydatefeed_keyfeed_timezonebase64_urlgtfs_dataset_keygtfs_dataset_nameregional_feed_typetypenservice_keyservice_any_feed
62c927bb3d92c13a63a7900caa77f4bee02023-10-18a9a4672431e928089176517c3297db66America/Los_AngelesaHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW...4a9e6a8b8db445bc9fc3cf398ded67b1Glendora ScheduleNonescheduleNaNf157fa35c6207fceeb9d883a62caa016False
64fd77340aba25ef0767ecebcba3f0d0de2023-10-18f561f3f554f4ef3f22121116be67b2f0US/PacificaHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi...4b84ffbdc2b6abb171a5df6ce8f06797Wasco DAR ScheduleNonescheduleNaN570f0f680cba9575db030462cbba50d1False
102bdd83751c48788bf23a2a9c571bd0d5e2023-10-1854c22d251df6fdaf2b3c1d699f4a739bAmerica/Los_AngelesaHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW...73e1cb24210dab4feb13fbf2924939d2Maywood ScheduleNonescheduleNaN7535ce68155dde3462a1248edc9919e4False
111a1647ad5f590c79d0b5f185b098560642023-10-18e4af9a8cc80c88b868f066824d992d9fAmerica/Los_AngelesaHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi...82a0ba5e020f951f6e780761537ef12bStanislaus FlexNonescheduleNaN98322f54467a89178241a42a03c93c66False
113fcb8bd67a2f3bfdac5d819feb5b14f042023-10-1807feb14721d9fe332a8e7fb37bf625ddAmerica/New_YorkaHR0cDovL3JpZGVndHJhbnMuY29tL2d0ZnMuemlw85e9d75e8430f242e9f5600d2f5c6964G Trans ScheduleNonescheduleNaN3e5d9847cfc0bcefbe11f601ad358690False
115f4e59cc5e52efaa8fad075e7aa9d693e2023-10-18a43c08f7ed73ee9e88204a260933b461America/Los_AngelesaHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3...88780135c261b5b1391afdd5d562e5baTracy ScheduleNonescheduleNaN242716bc3da3ddd600eeba2cd310fa1cFalse
1212353fa44db8fac09e9b2a994eb5799532023-10-186cf65a84397884d0ffb44db5b8a08bd4America/Los_AngelesaHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW...8e3f49cfd30b44746a0724ca60e596caInglewood ScheduleNonescheduleNaNc4ae3c5830ed2661c7b76bb8c946fc72False
13402ddc9b2168f50ae0bdce2fa101ce9582023-10-18e68cd2ffe7bb1760d28f3a94e3a31dacAmerica/Los_AngelesaHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi...9ca0bbfd2ada3d686f1c3a136c21eafcSan Juan Capistrano ScheduleNonescheduleNaN2de8641db93b73d3c99194f73c097d37False
154b9b6949a4dd7548ea9733d62099fd0fa2023-10-18696beb3cb2e375f8524ae18eff0d041dAmerica/Los_AngelesaHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi...b62b4267caca504e1057c52a91611c16Eastern Sierra FlexNonescheduleNaNe6ef2ac8d134a1ba042ce6df1a4b1efdFalse
1752a6176818feb911d0d444d7268594cc32023-10-18062563b11ac99ddec6d3bec6f613b78dAmerica/Los_AngelesaHR0cHM6Ly9tamNhY3Rpb24uY29tL01KQ19HVEZTX1B1Ym...c599bcb93f9c039473932479eb082d7dMorro Bay ScheduleNonescheduleNaNa3dca9bf042f372998a3251c3f16053cFalse
18594e92e15f61f2e04ba18e4d54e98bb9b2023-10-181b77ef49f5bc70038cbf15e4f5f98477America/Los_AngelesaHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW...cd08875e7d95ed218f98b7694ac8ea3fCompton ScheduleNonescheduleNaN968c9d086e00b106991c299dc47b5c12False
1866c486bf9f5ff3c0fbceb3c091f8ae0092023-10-18fe662c95bddfb6e5fd75cb0afbb85cd8America/Los_AngelesaHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi...cde2b7a63ab7bb33141c8b02b001ea0fTCAT FlexNonescheduleNaNd5969d0cb382b296935540c720eedc6aFalse
19437f0266a8efb77b896d7dc17b0d154832023-10-18e055f64de6bf34b0d31d67b6d6e31dfbAmerica/Los_AngelesaHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi...da71c7121e987c3cd333d48ee24e23efRedwood Coast FlexNonescheduleNaNa5659c824eeba107b24ffdcbeb005d15False
195cac869872a0a2cc693f3fe371dbef0642023-10-18d95f2f26bbf4846e4eb84d352fb0990dAmerica/Los_AngelesaHR0cHM6Ly93d3cucmVkb25kby5vcmcvY2l2aWNheC9maW...da7e9e09d3eec6c7686adc21c8b28b63Beach Cities ScheduleNonescheduleNaN5bc7371dca26d74a99be945b18b3174eTrue
21598a8b1b5e7cb731c206dfc8eb105b4d72023-10-18d5b1960f462b2798ad3d2bbd7a77a01eAmerica/Los_AngelesaHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW...f61fbdf46f6ea735259b9d0c48139ebeArtesia ScheduleNonescheduleNaNdb2ef944fe6d4024e62c6cf638091fceFalse
22652f018f92d2bc0d86e60b4e3fa5cea482023-10-18d1fd5c603d131faf826423a8f00aa7acAmerica/Los_AngelesaHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3...ff51495a24cf286ff2f7bc9b3401f855Blossom Express ScheduleNonescheduleNaN525d6d12e42890a9df6d7c95db907b0dFalse
\n", + "
" + ], + "text/plain": [ + " key date \\\n", + "62 c927bb3d92c13a63a7900caa77f4bee0 2023-10-18 \n", + "64 fd77340aba25ef0767ecebcba3f0d0de 2023-10-18 \n", + "102 bdd83751c48788bf23a2a9c571bd0d5e 2023-10-18 \n", + "111 a1647ad5f590c79d0b5f185b09856064 2023-10-18 \n", + "113 fcb8bd67a2f3bfdac5d819feb5b14f04 2023-10-18 \n", + "115 f4e59cc5e52efaa8fad075e7aa9d693e 2023-10-18 \n", + "121 2353fa44db8fac09e9b2a994eb579953 2023-10-18 \n", + "134 02ddc9b2168f50ae0bdce2fa101ce958 2023-10-18 \n", + "154 b9b6949a4dd7548ea9733d62099fd0fa 2023-10-18 \n", + "175 2a6176818feb911d0d444d7268594cc3 2023-10-18 \n", + "185 94e92e15f61f2e04ba18e4d54e98bb9b 2023-10-18 \n", + "186 6c486bf9f5ff3c0fbceb3c091f8ae009 2023-10-18 \n", + "194 37f0266a8efb77b896d7dc17b0d15483 2023-10-18 \n", + "195 cac869872a0a2cc693f3fe371dbef064 2023-10-18 \n", + "215 98a8b1b5e7cb731c206dfc8eb105b4d7 2023-10-18 \n", + "226 52f018f92d2bc0d86e60b4e3fa5cea48 2023-10-18 \n", + "\n", + " feed_key feed_timezone \\\n", + "62 a9a4672431e928089176517c3297db66 America/Los_Angeles \n", + "64 f561f3f554f4ef3f22121116be67b2f0 US/Pacific \n", + "102 54c22d251df6fdaf2b3c1d699f4a739b America/Los_Angeles \n", + "111 e4af9a8cc80c88b868f066824d992d9f America/Los_Angeles \n", + "113 07feb14721d9fe332a8e7fb37bf625dd America/New_York \n", + "115 a43c08f7ed73ee9e88204a260933b461 America/Los_Angeles \n", + "121 6cf65a84397884d0ffb44db5b8a08bd4 America/Los_Angeles \n", + "134 e68cd2ffe7bb1760d28f3a94e3a31dac America/Los_Angeles \n", + "154 696beb3cb2e375f8524ae18eff0d041d America/Los_Angeles \n", + "175 062563b11ac99ddec6d3bec6f613b78d America/Los_Angeles \n", + "185 1b77ef49f5bc70038cbf15e4f5f98477 America/Los_Angeles \n", + "186 fe662c95bddfb6e5fd75cb0afbb85cd8 America/Los_Angeles \n", + "194 e055f64de6bf34b0d31d67b6d6e31dfb America/Los_Angeles \n", + "195 d95f2f26bbf4846e4eb84d352fb0990d America/Los_Angeles \n", + "215 d5b1960f462b2798ad3d2bbd7a77a01e America/Los_Angeles \n", + "226 d1fd5c603d131faf826423a8f00aa7ac America/Los_Angeles \n", + "\n", + " base64_url \\\n", + "62 aHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW... \n", + "64 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", + "102 aHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW... \n", + "111 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", + "113 aHR0cDovL3JpZGVndHJhbnMuY29tL2d0ZnMuemlw \n", + "115 aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3... \n", + "121 aHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW... \n", + "134 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", + "154 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", + "175 aHR0cHM6Ly9tamNhY3Rpb24uY29tL01KQ19HVEZTX1B1Ym... \n", + "185 aHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW... \n", + "186 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", + "194 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", + "195 aHR0cHM6Ly93d3cucmVkb25kby5vcmcvY2l2aWNheC9maW... \n", + "215 aHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW... \n", + "226 aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3... \n", + "\n", + " gtfs_dataset_key gtfs_dataset_name \\\n", + "62 4a9e6a8b8db445bc9fc3cf398ded67b1 Glendora Schedule \n", + "64 4b84ffbdc2b6abb171a5df6ce8f06797 Wasco DAR Schedule \n", + "102 73e1cb24210dab4feb13fbf2924939d2 Maywood Schedule \n", + "111 82a0ba5e020f951f6e780761537ef12b Stanislaus Flex \n", + "113 85e9d75e8430f242e9f5600d2f5c6964 G Trans Schedule \n", + "115 88780135c261b5b1391afdd5d562e5ba Tracy Schedule \n", + "121 8e3f49cfd30b44746a0724ca60e596ca Inglewood Schedule \n", + "134 9ca0bbfd2ada3d686f1c3a136c21eafc San Juan Capistrano Schedule \n", + "154 b62b4267caca504e1057c52a91611c16 Eastern Sierra Flex \n", + "175 c599bcb93f9c039473932479eb082d7d Morro Bay Schedule \n", + "185 cd08875e7d95ed218f98b7694ac8ea3f Compton Schedule \n", + "186 cde2b7a63ab7bb33141c8b02b001ea0f TCAT Flex \n", + "194 da71c7121e987c3cd333d48ee24e23ef Redwood Coast Flex \n", + "195 da7e9e09d3eec6c7686adc21c8b28b63 Beach Cities Schedule \n", + "215 f61fbdf46f6ea735259b9d0c48139ebe Artesia Schedule \n", + "226 ff51495a24cf286ff2f7bc9b3401f855 Blossom Express Schedule \n", + "\n", + " regional_feed_type type n service_key \\\n", + "62 None schedule NaN f157fa35c6207fceeb9d883a62caa016 \n", + "64 None schedule NaN 570f0f680cba9575db030462cbba50d1 \n", + "102 None schedule NaN 7535ce68155dde3462a1248edc9919e4 \n", + "111 None schedule NaN 98322f54467a89178241a42a03c93c66 \n", + "113 None schedule NaN 3e5d9847cfc0bcefbe11f601ad358690 \n", + "115 None schedule NaN 242716bc3da3ddd600eeba2cd310fa1c \n", + "121 None schedule NaN c4ae3c5830ed2661c7b76bb8c946fc72 \n", + "134 None schedule NaN 2de8641db93b73d3c99194f73c097d37 \n", + "154 None schedule NaN e6ef2ac8d134a1ba042ce6df1a4b1efd \n", + "175 None schedule NaN a3dca9bf042f372998a3251c3f16053c \n", + "185 None schedule NaN 968c9d086e00b106991c299dc47b5c12 \n", + "186 None schedule NaN d5969d0cb382b296935540c720eedc6a \n", + "194 None schedule NaN a5659c824eeba107b24ffdcbeb005d15 \n", + "195 None schedule NaN 5bc7371dca26d74a99be945b18b3174e \n", + "215 None schedule NaN db2ef944fe6d4024e62c6cf638091fce \n", + "226 None schedule NaN 525d6d12e42890a9df6d7c95db907b0d \n", + "\n", + " service_any_feed \n", + "62 False \n", + "64 False \n", + "102 False \n", + "111 False \n", + "113 False \n", + "115 False \n", + "121 False \n", + "134 False \n", + "154 False \n", + "175 False \n", + "185 False \n", + "186 False \n", + "194 False \n", + "195 True \n", + "215 False \n", + "226 False " + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "feeds_on_target.apply(check_defined_elsewhere, axis=1, args=[feeds_on_target]) >> filter(_.n.isna())" + ] + }, + { + "cell_type": "code", + "execution_count": 11, "id": "467b6dbb-6c7c-4a8e-8a34-e5d2f478524a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(201, 11)" + "(227, 11)" ] }, - "execution_count": 28, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -621,7 +1105,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 22, "id": "ae9afaa4-64da-4b62-a22d-d60671278761", "metadata": {}, "outputs": [], @@ -631,7 +1115,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 23, "id": "1f021355-1f40-4121-8a6d-32eeb4a1c52e", "metadata": {}, "outputs": [], @@ -641,7 +1125,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 24, "id": "87642e47-80eb-4a21-81c4-e41f467a7c8d", "metadata": {}, "outputs": [], @@ -653,7 +1137,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 25, "id": "328b7a62-e5b7-47b8-b0e2-21f971b8e9e0", "metadata": {}, "outputs": [], @@ -663,7 +1147,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 26, "id": "5b8d6fd6-6847-4121-a6c8-212a7d01f5b7", "metadata": {}, "outputs": [], @@ -673,7 +1157,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 27, "id": "4a3502e5-7240-4bb1-bf33-28c96c316c3a", "metadata": {}, "outputs": [ @@ -761,7 +1245,7 @@ "3 [-118.38043, 34.89945, -114.59015, 37.81629] " ] }, - "execution_count": 16, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -772,7 +1256,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 28, "id": "97d117b1-8835-473a-893b-ead256f4fba5", "metadata": {}, "outputs": [], @@ -783,7 +1267,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 29, "id": "6b9402e0-24de-43c4-b04e-0c903a4657d6", "metadata": {}, "outputs": [], @@ -793,20 +1277,20 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 30, "id": "01160e41-81f6-45b8-ae76-86041b8f478f", "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
Make this Notebook Trusted to load map: File -> Trust Notebook
" + "
Make this Notebook Trusted to load map: File -> Trust Notebook
" ], "text/plain": [ - "" + "" ] }, - "execution_count": 19, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -817,7 +1301,7 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": 31, "id": "98644c75-cd9c-4032-8406-2f4a14a6edfd", "metadata": {}, "outputs": [ @@ -836,7 +1320,7 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 32, "id": "64a57fbd-26da-47d0-b549-254961afd70d", "metadata": {}, "outputs": [], @@ -846,7 +1330,7 @@ }, { "cell_type": "code", - "execution_count": 73, + "execution_count": 33, "id": "d1f999fc-9295-4478-85cf-d5cc6e0b5956", "metadata": {}, "outputs": [ @@ -856,7 +1340,7 @@ "(88593, 5)" ] }, - "execution_count": 73, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -867,7 +1351,7 @@ }, { "cell_type": "code", - "execution_count": 74, + "execution_count": 34, "id": "9a15d156-683f-4f15-8638-67054462baa0", "metadata": {}, "outputs": [], @@ -877,7 +1361,7 @@ }, { "cell_type": "code", - "execution_count": 75, + "execution_count": 35, "id": "306c2065-0c23-43a5-aa47-c2950615cccf", "metadata": {}, "outputs": [], @@ -889,7 +1373,7 @@ }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 36, "id": "02847606-a347-4f48-827a-eca90ae45207", "metadata": {}, "outputs": [ @@ -919,7 +1403,6 @@ " south\n", " east\n", " west\n", - " bbox\n", " geometry\n", " index_right\n", " key\n", @@ -936,9 +1419,8 @@ " 39.07038\n", " -119.60541\n", " -124.49158\n", - " [-124.49158, 39.07038, -119.60541, 42.03909]\n", " POLYGON ((34109.459 117232.679, 32737.676 4468...\n", - " 52148\n", + " 78711\n", " 64fa4388fc2492d469f6049e40c14f20\n", " 7bd3d8c32eda4869c4d7f8bf2aec5bb0\n", " db86bc0b-d3af-4163-a30a-6ece57770dbe\n", @@ -951,9 +1433,8 @@ " 35.87347\n", " -117.53174\n", " -123.83789\n", - " [-123.83789, 35.87347, -117.53174, 39.64165]\n", " POLYGON ((222589.608 -235276.505, 211687.216 1...\n", - " 52148\n", + " 78711\n", " 64fa4388fc2492d469f6049e40c14f20\n", " 7bd3d8c32eda4869c4d7f8bf2aec5bb0\n", " db86bc0b-d3af-4163-a30a-6ece57770dbe\n", @@ -966,9 +1447,8 @@ " 39.07038\n", " -119.60541\n", " -124.49158\n", - " [-124.49158, 39.07038, -119.60541, 42.03909]\n", " POLYGON ((34109.459 117232.679, 32737.676 4468...\n", - " 29210\n", + " 77704\n", " fabfb6cc39802b79ce2bb67e4fa3cd45\n", " 102b5149d072a697aeda213b8b72811b\n", " 4001\n", @@ -984,15 +1464,10 @@ "1 central 39.64165 35.87347 -117.53174 -123.83789 \n", "0 norcal 42.03909 39.07038 -119.60541 -124.49158 \n", "\n", - " bbox \\\n", - "0 [-124.49158, 39.07038, -119.60541, 42.03909] \n", - "1 [-123.83789, 35.87347, -117.53174, 39.64165] \n", - "0 [-124.49158, 39.07038, -119.60541, 42.03909] \n", - "\n", " geometry index_right \\\n", - "0 POLYGON ((34109.459 117232.679, 32737.676 4468... 52148 \n", - "1 POLYGON ((222589.608 -235276.505, 211687.216 1... 52148 \n", - "0 POLYGON ((34109.459 117232.679, 32737.676 4468... 29210 \n", + "0 POLYGON ((34109.459 117232.679, 32737.676 4468... 78711 \n", + "1 POLYGON ((222589.608 -235276.505, 211687.216 1... 78711 \n", + "0 POLYGON ((34109.459 117232.679, 32737.676 4468... 77704 \n", "\n", " key feed_key \\\n", "0 64fa4388fc2492d469f6049e40c14f20 7bd3d8c32eda4869c4d7f8bf2aec5bb0 \n", @@ -1005,7 +1480,7 @@ "0 4001 Colfax Depot " ] }, - "execution_count": 76, + "execution_count": 36, "metadata": {}, "output_type": "execute_result" } @@ -1016,7 +1491,7 @@ }, { "cell_type": "code", - "execution_count": 77, + "execution_count": 37, "id": "40afc409-7952-4a92-8f6c-0e4757d14b61", "metadata": {}, "outputs": [], @@ -1026,7 +1501,7 @@ }, { "cell_type": "code", - "execution_count": 78, + "execution_count": 38, "id": "85306aab-a9cf-4141-aa4b-5a4aa937e6ec", "metadata": {}, "outputs": [ @@ -1036,7 +1511,7 @@ "(197, 2)" ] }, - "execution_count": 78, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } @@ -1047,7 +1522,7 @@ }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 39, "id": "83802555-c94e-4c19-a842-6c0e5d033790", "metadata": {}, "outputs": [], @@ -1058,7 +1533,7 @@ }, { "cell_type": "code", - "execution_count": 80, + "execution_count": 40, "id": "457f1e27-5c02-4a29-a524-fbc62984bf27", "metadata": {}, "outputs": [ @@ -1101,7 +1576,7 @@ " \n", " \n", " 1\n", - " central\n", + " norcal\n", " 7bd3d8c32eda4869c4d7f8bf2aec5bb0\n", " Flixbus Schedule\n", " aHR0cDovL2d0ZnMuZ2lzLmZsaXgudGVjaC9ndGZzX2dlbm...\n", @@ -1109,7 +1584,7 @@ " \n", " \n", " 2\n", - " socal\n", + " central\n", " 7bd3d8c32eda4869c4d7f8bf2aec5bb0\n", " Flixbus Schedule\n", " aHR0cDovL2d0ZnMuZ2lzLmZsaXgudGVjaC9ndGZzX2dlbm...\n", @@ -1122,8 +1597,8 @@ "text/plain": [ " region feed_key gtfs_dataset_name \\\n", "0 norcal 7bd3d8c32eda4869c4d7f8bf2aec5bb0 Flixbus Schedule \n", - "1 central 7bd3d8c32eda4869c4d7f8bf2aec5bb0 Flixbus Schedule \n", - "2 socal 7bd3d8c32eda4869c4d7f8bf2aec5bb0 Flixbus Schedule \n", + "1 norcal 7bd3d8c32eda4869c4d7f8bf2aec5bb0 Flixbus Schedule \n", + "2 central 7bd3d8c32eda4869c4d7f8bf2aec5bb0 Flixbus Schedule \n", "\n", " base64_url date \n", "0 aHR0cDovL2d0ZnMuZ2lzLmZsaXgudGVjaC9ndGZzX2dlbm... 2023-10-18 \n", @@ -1131,7 +1606,7 @@ "2 aHR0cDovL2d0ZnMuZ2lzLmZsaXgudGVjaC9ndGZzX2dlbm... 2023-10-18 " ] }, - "execution_count": 80, + "execution_count": 40, "metadata": {}, "output_type": "execute_result" } diff --git a/conveyal_update/conveyal_vars.py b/conveyal_update/conveyal_vars.py index 9e9da7042..0b99f238b 100644 --- a/conveyal_update/conveyal_vars.py +++ b/conveyal_update/conveyal_vars.py @@ -1,5 +1,6 @@ import datetime as dt +gcs_path = 'gs://calitp-analytics-data/data-analyses/conveyal_update/' target_date = dt.date(2023, 10, 18) conveyal_regions = {} diff --git a/conveyal_update/evaluate_feeds.py b/conveyal_update/evaluate_feeds.py new file mode 100644 index 000000000..77b415d9b --- /dev/null +++ b/conveyal_update/evaluate_feeds.py @@ -0,0 +1,69 @@ +import os +os.environ["CALITP_BQ_MAX_BYTES"] = str(800_000_000_000) +from shared_utils import gtfs_utils_v2 + +from calitp_data_analysis.tables import tbls +from siuba import * +import pandas as pd +import datetime as dt + +import conveyal_vars + +def check_defined_elsewhere(row, df): + ''' + for feeds without service defined, check if the same service is captured in another feed that does include service + ''' + is_defined = ((df >> filter(-_.n.isna())).service_key == row.service_key).any() + row['service_any_feed'] = is_defined + return row + +target_date = conveyal_vars.target_date + +def get_feeds_check_service(): + feeds_on_target = gtfs_utils_v2.schedule_daily_feed_to_gtfs_dataset_name(selected_date=target_date) + # default will use mtc subfeeds (prev Conveyal behavior), can spec customer facing if we wanna switch + + operator_feeds = feeds_on_target.feed_key + trips = ( + tbls.mart_gtfs.fct_scheduled_trips() + >> filter(_.feed_key.isin(operator_feeds), _.service_date == target_date) + >> group_by(_.feed_key) + >> count(_.feed_key) + # >> collect() + # >> mutate(any_trip = True) + ) + service_defined = trips >> collect() + feeds_on_target = feeds_on_target >> left_join(_, service_defined, on = 'feed_key') >> select(-_.name) + return feeds_on_target + +def attach_transit_services(feeds_on_target: pd.DataFrame): + + target_dt = dt.datetime.combine(target_date, dt.time(0)) + + services = (tbls.mart_transit_database.dim_gtfs_service_data() + >> filter(_._valid_from <= target_dt, _._valid_to > target_dt) + # >> filter(_.gtfs_dataset_key == 'da7e9e09d3eec6c7686adc21c8b28b63') # test with BCT + # >> filter(_.service_key == '5bc7371dca26d74a99be945b18b3174e') + >> select(_.service_key, _.gtfs_dataset_key) + >> collect() + ) + + feeds_on_target = feeds_on_target >> left_join(_, services, on='gtfs_dataset_key') + return feeds_on_target + +def report_undefined(feeds_on_target: pd.DataFrame): + fname = 'no_apparent_service.csv' + undefined = feeds_on_target.apply(check_defined_elsewhere, axis=1, args=[feeds_on_target]) >> filter(-_.service_any_feed) + print('these feeds have no service defined on target date, nor are their services captured in other feeds:') + print(undefined >> select(_.gtfs_dataset_name, _.service_any_feed)) + print(f'saving detailed csv to {fname}') + undefined.to_csv(fname) + return + +if __name__ == '__main__': + + feeds_on_target = get_feeds_check_service() + feeds_on_target = attach_transit_services(feeds_on_target) + report_undefined(feeds_on_target) + feeds_on_target.to_parquet(f'{conveyal_vars.gcs_path}feeds_{target_date.isoformat()}.parquet') + \ No newline at end of file diff --git a/conveyal_update/match_feeds_regions.py b/conveyal_update/match_feeds_regions.py new file mode 100644 index 000000000..50074504a --- /dev/null +++ b/conveyal_update/match_feeds_regions.py @@ -0,0 +1,38 @@ +import os +os.environ['USE_PYGEOS'] = '0' +from shared_utils import gtfs_utils_v2 + +import pandas as pd +import geopandas as gpd +import shapely + +import conveyal_vars + +regions = conveyal_vars.conveyal_regions +target_date = conveyal_vars.target_date +feeds_on_target = pd.read_parquet(f'{conveyal_vars.gcs_path}feeds_{target_date.isoformat()}.parquet') + +def create_region_gdf(): + # https://shapely.readthedocs.io/en/stable/reference/shapely.box.html#shapely.box + # xmin, ymin, xmax, ymax + to_bbox = lambda x: [x['west'], x['south'], x['east'], x['north']] + df = pd.DataFrame(regions).transpose().reset_index().rename(columns={'index':'region'}) + df['bbox'] = df.apply(to_bbox, axis=1) + df['geometry'] = df.apply(lambda x: shapely.geometry.box(*x.bbox), axis = 1) + df = df >> select(-_.bbox) + region_gdf = gpd.GeoDataFrame(df, crs=WGS84).to_crs(CA_NAD83Albers) + return region_gdf + +def join_stops_regions(region_gdf: gpd.GeoDataFrame, feeds_on_target: pd.DataFrame): + all_stops = gtfs_utils_v2.get_stops(selected_date=target_date, operator_feeds=feeds_on_target.feed_key).to_crs(CA_NAD83Albers) + region_join = gpd.sjoin(region_gdf, all_stops) + regions_and_feeds = region_join >> distinct(_.region, _.feed_key) + return regions_and_feeds + +if __name__ == '__main__': + + region_gdf = create_region_gdf() + regions_and_feeds = join_stops_regions(region_gdf, feeds_on_target) + regions_and_feeds = regions_and_feeds >> inner_join(_, feeds_on_target >> select(_.feed_key, _.gtfs_dataset_name, _.base64_url, + _.date), on = 'feed_key') + regions_and_feeds.to_parquet(f'{conveyal_vars.gcs_path}regions_feeds_{target_date.isoformat()}.parquet') \ No newline at end of file From 01465af6fb01dad8b35abf04440c99fa5b8cd2f1 Mon Sep 17 00:00:00 2001 From: Eric Dasmalchi Date: Mon, 6 Nov 2023 22:45:31 +0000 Subject: [PATCH 3/4] test another date --- conveyal_update/conveyal_vars.py | 2 +- conveyal_update/match_feeds_regions.py | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/conveyal_update/conveyal_vars.py b/conveyal_update/conveyal_vars.py index 0b99f238b..5bed0c33d 100644 --- a/conveyal_update/conveyal_vars.py +++ b/conveyal_update/conveyal_vars.py @@ -1,7 +1,7 @@ import datetime as dt gcs_path = 'gs://calitp-analytics-data/data-analyses/conveyal_update/' -target_date = dt.date(2023, 10, 18) +target_date = dt.date(2023, 11, 1) conveyal_regions = {} # boundaries correspond to Conveyal Analysis regions diff --git a/conveyal_update/match_feeds_regions.py b/conveyal_update/match_feeds_regions.py index 50074504a..f269bade2 100644 --- a/conveyal_update/match_feeds_regions.py +++ b/conveyal_update/match_feeds_regions.py @@ -1,8 +1,10 @@ import os os.environ['USE_PYGEOS'] = '0' -from shared_utils import gtfs_utils_v2 +os.environ["CALITP_BQ_MAX_BYTES"] = str(800_000_000_000) +from shared_utils import gtfs_utils_v2, geography_utils import pandas as pd +from siuba import * import geopandas as gpd import shapely @@ -20,11 +22,11 @@ def create_region_gdf(): df['bbox'] = df.apply(to_bbox, axis=1) df['geometry'] = df.apply(lambda x: shapely.geometry.box(*x.bbox), axis = 1) df = df >> select(-_.bbox) - region_gdf = gpd.GeoDataFrame(df, crs=WGS84).to_crs(CA_NAD83Albers) + region_gdf = gpd.GeoDataFrame(df, crs=geography_utils.WGS84).to_crs(geography_utils.CA_NAD83Albers) return region_gdf def join_stops_regions(region_gdf: gpd.GeoDataFrame, feeds_on_target: pd.DataFrame): - all_stops = gtfs_utils_v2.get_stops(selected_date=target_date, operator_feeds=feeds_on_target.feed_key).to_crs(CA_NAD83Albers) + all_stops = gtfs_utils_v2.get_stops(selected_date=target_date, operator_feeds=feeds_on_target.feed_key).to_crs(geography_utils.CA_NAD83Albers) region_join = gpd.sjoin(region_gdf, all_stops) regions_and_feeds = region_join >> distinct(_.region, _.feed_key) return regions_and_feeds From 3c2edcfb38a3349c375ac0a2f4ba619c877f3f61 Mon Sep 17 00:00:00 2001 From: Eric Dasmalchi Date: Mon, 6 Nov 2023 23:17:49 +0000 Subject: [PATCH 4/4] finish scripts and readme --- conveyal_update/Makefile | 4 + conveyal_update/README.md | 11 +- conveyal_update/check_download_feeds.ipynb | 2296 -------------------- conveyal_update/conveyal_vars.py | 5 +- conveyal_update/download_data.py | 51 + conveyal_update/old_PrepGTFSFeeds.ipynb | 265 --- 6 files changed, 67 insertions(+), 2565 deletions(-) create mode 100644 conveyal_update/Makefile delete mode 100644 conveyal_update/check_download_feeds.ipynb create mode 100644 conveyal_update/download_data.py delete mode 100644 conveyal_update/old_PrepGTFSFeeds.ipynb diff --git a/conveyal_update/Makefile b/conveyal_update/Makefile new file mode 100644 index 000000000..27c885441 --- /dev/null +++ b/conveyal_update/Makefile @@ -0,0 +1,4 @@ +stage_conveyal_update: + python evaluate_feeds.py + python match_feeds_regions.py + python download_data.py \ No newline at end of file diff --git a/conveyal_update/README.md b/conveyal_update/README.md index 7fde19a63..9de4b284e 100644 --- a/conveyal_update/README.md +++ b/conveyal_update/README.md @@ -9,6 +9,11 @@ ## Scripts * Set target date in `conveyal_vars.py`. Region boundaries are also set here, but these should remain static unless the decision is made to use entirely different regions in Conveyal. Target date should be a mid-week day. -* TODO `evaluate_feeds.py` includes functions to check to see which feeds have service defined on the target date, and show feeds without any apparent service, including if that service is apparently captured in another feed. This helps check for potential coverage gaps, likely due to GTFS feed expirations and/or the [publishing future service issue](https://github.com/MobilityData/GTFS_Schedule_Best-Practices/issues/48(https://github.com/MobilityData/GTFS_Schedule_Best-Practices/issues/48). You may have to shift the target date around to find the best overall coverage, and/or manually edit important but missing feeds to define service if reasonable. -* TODO `match_feeds_regions.py` -* TODO `download_data.py`, and additionally generates handoff outputs that can be used to download, crop, and filter OSM data for each region using... \ No newline at end of file +* `evaluate_feeds.py` includes functions to check to see which feeds have service defined on the target date, and show feeds without any apparent service, including if that service is apparently captured in another feed. This helps check for potential coverage gaps, likely due to GTFS feed expirations and/or the [publishing future service issue](https://github.com/MobilityData/GTFS_Schedule_Best-Practices/issues/48(https://github.com/MobilityData/GTFS_Schedule_Best-Practices/issues/48). You may have to shift the target date around to find the best overall coverage, and/or manually edit important but missing feeds to define service if reasonable. +* `match_feeds_regions.py` matches feeds to Conveyal regions, based on if the feed contains _any_ stops within each region. +* `download_data.py` downloads and zips original GTFS feeds, and additionally generates a shell script that can be used to download, crop, and filter OSM data for each region using Osmosis (not currently able to do so via hub, use other platform). + +## Workflow + +* `make stage_conveyal_update` to run all scripts +* using generated `crop_filter_osm.sh` or other means, [update Conveyal network bundle](https://docs.conveyal.com/prepare-inputs#creating-a-network-bundle) for each region with cropped and filtered OSM data and feeds. \ No newline at end of file diff --git a/conveyal_update/check_download_feeds.ipynb b/conveyal_update/check_download_feeds.ipynb deleted file mode 100644 index b15262b68..000000000 --- a/conveyal_update/check_download_feeds.ipynb +++ /dev/null @@ -1,2296 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "c66b1132-56b5-4b11-9318-b8f268ecab30", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "os.environ[\"CALITP_BQ_MAX_BYTES\"] = str(800_000_000_000)\n", - "from shared_utils import gtfs_utils_v2\n", - "\n", - "from calitp_data_analysis import get_fs\n", - "from calitp_data_analysis.tables import tbls\n", - "import calitp_data_analysis.magics\n", - "\n", - "from siuba import *\n", - "import pandas as pd\n", - "import geopandas as gpd\n", - "\n", - "import datetime as dt\n", - "\n", - "from shared_utils.geography_utils import WGS84, CA_NAD83Albers\n", - "\n", - "from tqdm.notebook import tqdm\n", - "tqdm.pandas()\n", - "\n", - "import conveyal_vars" - ] - }, - { - "cell_type": "markdown", - "id": "5b998348-8988-48f2-93b0-a0abf917f02e", - "metadata": {}, - "source": [ - "# Updating procedure for updating Conveyal network bundles\n", - "\n", - "* much of this could be in DBT?\n", - "* First, select a date (download feeds ingested by the warehouse for that date)\n", - "* Then match all stops to the four Conveyal analysis regions\n", - "* Download _raw_ gtfs schedule feeds (individual feeds), and save them in nested output folder, zipped for download\n", - "* (on local pc) Unzip and upload appropriate region using Conveyal's create network bundle tool" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "296bfae5-a830-4b35-a202-87f0c3143c83", - "metadata": {}, - "outputs": [], - "source": [ - "target_date = conveyal_vars.target_date" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "f73da205-2eea-4acf-a649-c1f1e2441ac3", - "metadata": {}, - "outputs": [], - "source": [ - "feeds_on_target = gtfs_utils_v2.schedule_daily_feed_to_gtfs_dataset_name(selected_date=target_date)\n", - "# default will use mtc subfeeds (prev Conveyal behavior), can spec customer facing if we wanna switch" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "d2119c99-4e57-4896-909e-4fc0e7c420f7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
keydatefeed_keyfeed_timezonebase64_urlgtfs_dataset_keygtfs_dataset_nameregional_feed_typenametype
0ba5a81692fc0dd50972e84502fc529cc2023-10-18132d2fed3f191ebe86e3de2c7cd31a4aAmerica/Los_AngelesaHR0cHM6Ly90Y3J0YS50cmlwc2hvdC5jb20vdjEvZ3Rmcy...0139b1253130b33adcd4b3a4490530d2TCRTA TripShot ScheduleNoneTCRTA TripShot Scheduleschedule
101d44336e6421ae3cc2be5a0d68a3e4f2023-10-185ad0314c752ed78142d6ebbf7e63f922America/Los_AngelesaHR0cHM6Ly9hcHAubWVjYXRyYW4uY29tL3VyYi93cy9mZW...014d0998350083249a9eb310635548c2SLO ScheduleNoneSLO Scheduleschedule
2e35ed9aa390c162b7039aefe2df4208e2023-10-1843bda252fd929bf57f18a19b780ec33bAmerica/Los_AngelesaHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZW...015d67d5b75b5cf2b710bbadadfb75f5Bay Area 511 Marin ScheduleRegional SubfeedBay Area 511 Marin Scheduleschedule
\n", - "
" - ], - "text/plain": [ - " key date \\\n", - "0 ba5a81692fc0dd50972e84502fc529cc 2023-10-18 \n", - "1 01d44336e6421ae3cc2be5a0d68a3e4f 2023-10-18 \n", - "2 e35ed9aa390c162b7039aefe2df4208e 2023-10-18 \n", - "\n", - " feed_key feed_timezone \\\n", - "0 132d2fed3f191ebe86e3de2c7cd31a4a America/Los_Angeles \n", - "1 5ad0314c752ed78142d6ebbf7e63f922 America/Los_Angeles \n", - "2 43bda252fd929bf57f18a19b780ec33b America/Los_Angeles \n", - "\n", - " base64_url \\\n", - "0 aHR0cHM6Ly90Y3J0YS50cmlwc2hvdC5jb20vdjEvZ3Rmcy... \n", - "1 aHR0cHM6Ly9hcHAubWVjYXRyYW4uY29tL3VyYi93cy9mZW... \n", - "2 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZW... \n", - "\n", - " gtfs_dataset_key gtfs_dataset_name \\\n", - "0 0139b1253130b33adcd4b3a4490530d2 TCRTA TripShot Schedule \n", - "1 014d0998350083249a9eb310635548c2 SLO Schedule \n", - "2 015d67d5b75b5cf2b710bbadadfb75f5 Bay Area 511 Marin Schedule \n", - "\n", - " regional_feed_type name type \n", - "0 None TCRTA TripShot Schedule schedule \n", - "1 None SLO Schedule schedule \n", - "2 Regional Subfeed Bay Area 511 Marin Schedule schedule " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "feeds_on_target >> head(3)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "99a28abf-f771-4873-861c-e613a07d099e", - "metadata": {}, - "outputs": [], - "source": [ - "operator_feeds = feeds_on_target.feed_key" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "53732ff0-4ba5-4b2b-bfb4-0d2f4a61de55", - "metadata": {}, - "outputs": [], - "source": [ - "trips = (\n", - " tbls.mart_gtfs.fct_scheduled_trips()\n", - " >> filter(_.feed_key.isin(operator_feeds), _.service_date == target_date)\n", - " >> group_by(_.feed_key)\n", - " >> count(_.feed_key)\n", - " # >> collect()\n", - " # >> mutate(any_trip = True)\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "b3de2066-875f-4729-a935-3dc4b0988e31", - "metadata": {}, - "outputs": [], - "source": [ - "service_defined = trips >> collect()" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "5aabc27c-d744-4811-bd4f-0e524c994293", - "metadata": {}, - "outputs": [], - "source": [ - "target_dt = dt.datetime.combine(target_date, dt.time(0))\n", - "\n", - "services = (tbls.mart_transit_database.dim_gtfs_service_data()\n", - " >> filter(_._valid_from <= target_dt, _._valid_to > target_dt)\n", - " # >> filter(_.gtfs_dataset_key == 'da7e9e09d3eec6c7686adc21c8b28b63') # test with BCT\n", - " # >> filter(_.service_key == '5bc7371dca26d74a99be945b18b3174e')\n", - " >> select(_.service_key, _.gtfs_dataset_key)\n", - " >> collect()\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "82b1cd8d-eae8-4014-be31-6830e83f0734", - "metadata": {}, - "outputs": [], - "source": [ - "feeds_on_target = (feeds_on_target >> left_join(_, service_defined, on = 'feed_key')\n", - " >> select(-_.name)\n", - " >> left_join(_, services, on='gtfs_dataset_key')\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "f8434c0f-4699-422f-95ae-27d6b6e2dffd", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
keydatefeed_keyfeed_timezonebase64_urlgtfs_dataset_keygtfs_dataset_nameregional_feed_typetypenservice_key
62c927bb3d92c13a63a7900caa77f4bee02023-10-18a9a4672431e928089176517c3297db66America/Los_AngelesaHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW...4a9e6a8b8db445bc9fc3cf398ded67b1Glendora ScheduleNonescheduleNaNf157fa35c6207fceeb9d883a62caa016
64fd77340aba25ef0767ecebcba3f0d0de2023-10-18f561f3f554f4ef3f22121116be67b2f0US/PacificaHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi...4b84ffbdc2b6abb171a5df6ce8f06797Wasco DAR ScheduleNonescheduleNaN570f0f680cba9575db030462cbba50d1
102bdd83751c48788bf23a2a9c571bd0d5e2023-10-1854c22d251df6fdaf2b3c1d699f4a739bAmerica/Los_AngelesaHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW...73e1cb24210dab4feb13fbf2924939d2Maywood ScheduleNonescheduleNaN7535ce68155dde3462a1248edc9919e4
111a1647ad5f590c79d0b5f185b098560642023-10-18e4af9a8cc80c88b868f066824d992d9fAmerica/Los_AngelesaHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi...82a0ba5e020f951f6e780761537ef12bStanislaus FlexNonescheduleNaN98322f54467a89178241a42a03c93c66
113fcb8bd67a2f3bfdac5d819feb5b14f042023-10-1807feb14721d9fe332a8e7fb37bf625ddAmerica/New_YorkaHR0cDovL3JpZGVndHJhbnMuY29tL2d0ZnMuemlw85e9d75e8430f242e9f5600d2f5c6964G Trans ScheduleNonescheduleNaN3e5d9847cfc0bcefbe11f601ad358690
115f4e59cc5e52efaa8fad075e7aa9d693e2023-10-18a43c08f7ed73ee9e88204a260933b461America/Los_AngelesaHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3...88780135c261b5b1391afdd5d562e5baTracy ScheduleNonescheduleNaN242716bc3da3ddd600eeba2cd310fa1c
1212353fa44db8fac09e9b2a994eb5799532023-10-186cf65a84397884d0ffb44db5b8a08bd4America/Los_AngelesaHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW...8e3f49cfd30b44746a0724ca60e596caInglewood ScheduleNonescheduleNaNc4ae3c5830ed2661c7b76bb8c946fc72
13402ddc9b2168f50ae0bdce2fa101ce9582023-10-18e68cd2ffe7bb1760d28f3a94e3a31dacAmerica/Los_AngelesaHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi...9ca0bbfd2ada3d686f1c3a136c21eafcSan Juan Capistrano ScheduleNonescheduleNaN2de8641db93b73d3c99194f73c097d37
154b9b6949a4dd7548ea9733d62099fd0fa2023-10-18696beb3cb2e375f8524ae18eff0d041dAmerica/Los_AngelesaHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi...b62b4267caca504e1057c52a91611c16Eastern Sierra FlexNonescheduleNaNe6ef2ac8d134a1ba042ce6df1a4b1efd
1752a6176818feb911d0d444d7268594cc32023-10-18062563b11ac99ddec6d3bec6f613b78dAmerica/Los_AngelesaHR0cHM6Ly9tamNhY3Rpb24uY29tL01KQ19HVEZTX1B1Ym...c599bcb93f9c039473932479eb082d7dMorro Bay ScheduleNonescheduleNaNa3dca9bf042f372998a3251c3f16053c
18594e92e15f61f2e04ba18e4d54e98bb9b2023-10-181b77ef49f5bc70038cbf15e4f5f98477America/Los_AngelesaHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW...cd08875e7d95ed218f98b7694ac8ea3fCompton ScheduleNonescheduleNaN968c9d086e00b106991c299dc47b5c12
1866c486bf9f5ff3c0fbceb3c091f8ae0092023-10-18fe662c95bddfb6e5fd75cb0afbb85cd8America/Los_AngelesaHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi...cde2b7a63ab7bb33141c8b02b001ea0fTCAT FlexNonescheduleNaNd5969d0cb382b296935540c720eedc6a
19437f0266a8efb77b896d7dc17b0d154832023-10-18e055f64de6bf34b0d31d67b6d6e31dfbAmerica/Los_AngelesaHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi...da71c7121e987c3cd333d48ee24e23efRedwood Coast FlexNonescheduleNaNa5659c824eeba107b24ffdcbeb005d15
195cac869872a0a2cc693f3fe371dbef0642023-10-18d95f2f26bbf4846e4eb84d352fb0990dAmerica/Los_AngelesaHR0cHM6Ly93d3cucmVkb25kby5vcmcvY2l2aWNheC9maW...da7e9e09d3eec6c7686adc21c8b28b63Beach Cities ScheduleNonescheduleNaN5bc7371dca26d74a99be945b18b3174e
21598a8b1b5e7cb731c206dfc8eb105b4d72023-10-18d5b1960f462b2798ad3d2bbd7a77a01eAmerica/Los_AngelesaHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW...f61fbdf46f6ea735259b9d0c48139ebeArtesia ScheduleNonescheduleNaNdb2ef944fe6d4024e62c6cf638091fce
22652f018f92d2bc0d86e60b4e3fa5cea482023-10-18d1fd5c603d131faf826423a8f00aa7acAmerica/Los_AngelesaHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3...ff51495a24cf286ff2f7bc9b3401f855Blossom Express ScheduleNonescheduleNaN525d6d12e42890a9df6d7c95db907b0d
\n", - "
" - ], - "text/plain": [ - " key date \\\n", - "62 c927bb3d92c13a63a7900caa77f4bee0 2023-10-18 \n", - "64 fd77340aba25ef0767ecebcba3f0d0de 2023-10-18 \n", - "102 bdd83751c48788bf23a2a9c571bd0d5e 2023-10-18 \n", - "111 a1647ad5f590c79d0b5f185b09856064 2023-10-18 \n", - "113 fcb8bd67a2f3bfdac5d819feb5b14f04 2023-10-18 \n", - "115 f4e59cc5e52efaa8fad075e7aa9d693e 2023-10-18 \n", - "121 2353fa44db8fac09e9b2a994eb579953 2023-10-18 \n", - "134 02ddc9b2168f50ae0bdce2fa101ce958 2023-10-18 \n", - "154 b9b6949a4dd7548ea9733d62099fd0fa 2023-10-18 \n", - "175 2a6176818feb911d0d444d7268594cc3 2023-10-18 \n", - "185 94e92e15f61f2e04ba18e4d54e98bb9b 2023-10-18 \n", - "186 6c486bf9f5ff3c0fbceb3c091f8ae009 2023-10-18 \n", - "194 37f0266a8efb77b896d7dc17b0d15483 2023-10-18 \n", - "195 cac869872a0a2cc693f3fe371dbef064 2023-10-18 \n", - "215 98a8b1b5e7cb731c206dfc8eb105b4d7 2023-10-18 \n", - "226 52f018f92d2bc0d86e60b4e3fa5cea48 2023-10-18 \n", - "\n", - " feed_key feed_timezone \\\n", - "62 a9a4672431e928089176517c3297db66 America/Los_Angeles \n", - "64 f561f3f554f4ef3f22121116be67b2f0 US/Pacific \n", - "102 54c22d251df6fdaf2b3c1d699f4a739b America/Los_Angeles \n", - "111 e4af9a8cc80c88b868f066824d992d9f America/Los_Angeles \n", - "113 07feb14721d9fe332a8e7fb37bf625dd America/New_York \n", - "115 a43c08f7ed73ee9e88204a260933b461 America/Los_Angeles \n", - "121 6cf65a84397884d0ffb44db5b8a08bd4 America/Los_Angeles \n", - "134 e68cd2ffe7bb1760d28f3a94e3a31dac America/Los_Angeles \n", - "154 696beb3cb2e375f8524ae18eff0d041d America/Los_Angeles \n", - "175 062563b11ac99ddec6d3bec6f613b78d America/Los_Angeles \n", - "185 1b77ef49f5bc70038cbf15e4f5f98477 America/Los_Angeles \n", - "186 fe662c95bddfb6e5fd75cb0afbb85cd8 America/Los_Angeles \n", - "194 e055f64de6bf34b0d31d67b6d6e31dfb America/Los_Angeles \n", - "195 d95f2f26bbf4846e4eb84d352fb0990d America/Los_Angeles \n", - "215 d5b1960f462b2798ad3d2bbd7a77a01e America/Los_Angeles \n", - "226 d1fd5c603d131faf826423a8f00aa7ac America/Los_Angeles \n", - "\n", - " base64_url \\\n", - "62 aHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW... \n", - "64 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", - "102 aHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW... \n", - "111 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", - "113 aHR0cDovL3JpZGVndHJhbnMuY29tL2d0ZnMuemlw \n", - "115 aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3... \n", - "121 aHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW... \n", - "134 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", - "154 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", - "175 aHR0cHM6Ly9tamNhY3Rpb24uY29tL01KQ19HVEZTX1B1Ym... \n", - "185 aHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW... \n", - "186 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", - "194 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", - "195 aHR0cHM6Ly93d3cucmVkb25kby5vcmcvY2l2aWNheC9maW... \n", - "215 aHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW... \n", - "226 aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3... \n", - "\n", - " gtfs_dataset_key gtfs_dataset_name \\\n", - "62 4a9e6a8b8db445bc9fc3cf398ded67b1 Glendora Schedule \n", - "64 4b84ffbdc2b6abb171a5df6ce8f06797 Wasco DAR Schedule \n", - "102 73e1cb24210dab4feb13fbf2924939d2 Maywood Schedule \n", - "111 82a0ba5e020f951f6e780761537ef12b Stanislaus Flex \n", - "113 85e9d75e8430f242e9f5600d2f5c6964 G Trans Schedule \n", - "115 88780135c261b5b1391afdd5d562e5ba Tracy Schedule \n", - "121 8e3f49cfd30b44746a0724ca60e596ca Inglewood Schedule \n", - "134 9ca0bbfd2ada3d686f1c3a136c21eafc San Juan Capistrano Schedule \n", - "154 b62b4267caca504e1057c52a91611c16 Eastern Sierra Flex \n", - "175 c599bcb93f9c039473932479eb082d7d Morro Bay Schedule \n", - "185 cd08875e7d95ed218f98b7694ac8ea3f Compton Schedule \n", - "186 cde2b7a63ab7bb33141c8b02b001ea0f TCAT Flex \n", - "194 da71c7121e987c3cd333d48ee24e23ef Redwood Coast Flex \n", - "195 da7e9e09d3eec6c7686adc21c8b28b63 Beach Cities Schedule \n", - "215 f61fbdf46f6ea735259b9d0c48139ebe Artesia Schedule \n", - "226 ff51495a24cf286ff2f7bc9b3401f855 Blossom Express Schedule \n", - "\n", - " regional_feed_type type n service_key \n", - "62 None schedule NaN f157fa35c6207fceeb9d883a62caa016 \n", - "64 None schedule NaN 570f0f680cba9575db030462cbba50d1 \n", - "102 None schedule NaN 7535ce68155dde3462a1248edc9919e4 \n", - "111 None schedule NaN 98322f54467a89178241a42a03c93c66 \n", - "113 None schedule NaN 3e5d9847cfc0bcefbe11f601ad358690 \n", - "115 None schedule NaN 242716bc3da3ddd600eeba2cd310fa1c \n", - "121 None schedule NaN c4ae3c5830ed2661c7b76bb8c946fc72 \n", - "134 None schedule NaN 2de8641db93b73d3c99194f73c097d37 \n", - "154 None schedule NaN e6ef2ac8d134a1ba042ce6df1a4b1efd \n", - "175 None schedule NaN a3dca9bf042f372998a3251c3f16053c \n", - "185 None schedule NaN 968c9d086e00b106991c299dc47b5c12 \n", - "186 None schedule NaN d5969d0cb382b296935540c720eedc6a \n", - "194 None schedule NaN a5659c824eeba107b24ffdcbeb005d15 \n", - "195 None schedule NaN 5bc7371dca26d74a99be945b18b3174e \n", - "215 None schedule NaN db2ef944fe6d4024e62c6cf638091fce \n", - "226 None schedule NaN 525d6d12e42890a9df6d7c95db907b0d " - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# feeds without any service defined on target date\n", - "# TODO lookback/recursion?\n", - "# TODO column for \"service has service defined in another feed, ex. BCT -> GMV BCT\"\n", - "feeds_on_target >> filter(_.n.isna())" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "2b81663d-1513-430f-9471-092801753c56", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "((feeds_on_target >> filter(-_.n.isna())).service_key == '5bc7371dca26d74a99be945b18b3174e').any()" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "62c97603-a1da-48eb-8515-ea0c5c8346df", - "metadata": {}, - "outputs": [], - "source": [ - "def check_defined_elsewhere(row, df):\n", - " '''\n", - " for feeds without service defined, check if the same service is captured in another feed that does include service\n", - " '''\n", - " is_defined = ((df >> filter(-_.n.isna())).service_key == row.service_key).any()\n", - " row['service_any_feed'] = is_defined\n", - " return row" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "fd5991cf-651d-4eaa-880d-9b4a6fe81ad2", - "metadata": { - "collapsed": true, - "jupyter": { - "outputs_hidden": true - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
keydatefeed_keyfeed_timezonebase64_urlgtfs_dataset_keygtfs_dataset_nameregional_feed_typetypenservice_keyservice_any_feed
62c927bb3d92c13a63a7900caa77f4bee02023-10-18a9a4672431e928089176517c3297db66America/Los_AngelesaHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW...4a9e6a8b8db445bc9fc3cf398ded67b1Glendora ScheduleNonescheduleNaNf157fa35c6207fceeb9d883a62caa016False
64fd77340aba25ef0767ecebcba3f0d0de2023-10-18f561f3f554f4ef3f22121116be67b2f0US/PacificaHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi...4b84ffbdc2b6abb171a5df6ce8f06797Wasco DAR ScheduleNonescheduleNaN570f0f680cba9575db030462cbba50d1False
102bdd83751c48788bf23a2a9c571bd0d5e2023-10-1854c22d251df6fdaf2b3c1d699f4a739bAmerica/Los_AngelesaHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW...73e1cb24210dab4feb13fbf2924939d2Maywood ScheduleNonescheduleNaN7535ce68155dde3462a1248edc9919e4False
111a1647ad5f590c79d0b5f185b098560642023-10-18e4af9a8cc80c88b868f066824d992d9fAmerica/Los_AngelesaHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi...82a0ba5e020f951f6e780761537ef12bStanislaus FlexNonescheduleNaN98322f54467a89178241a42a03c93c66False
113fcb8bd67a2f3bfdac5d819feb5b14f042023-10-1807feb14721d9fe332a8e7fb37bf625ddAmerica/New_YorkaHR0cDovL3JpZGVndHJhbnMuY29tL2d0ZnMuemlw85e9d75e8430f242e9f5600d2f5c6964G Trans ScheduleNonescheduleNaN3e5d9847cfc0bcefbe11f601ad358690False
115f4e59cc5e52efaa8fad075e7aa9d693e2023-10-18a43c08f7ed73ee9e88204a260933b461America/Los_AngelesaHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3...88780135c261b5b1391afdd5d562e5baTracy ScheduleNonescheduleNaN242716bc3da3ddd600eeba2cd310fa1cFalse
1212353fa44db8fac09e9b2a994eb5799532023-10-186cf65a84397884d0ffb44db5b8a08bd4America/Los_AngelesaHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW...8e3f49cfd30b44746a0724ca60e596caInglewood ScheduleNonescheduleNaNc4ae3c5830ed2661c7b76bb8c946fc72False
13402ddc9b2168f50ae0bdce2fa101ce9582023-10-18e68cd2ffe7bb1760d28f3a94e3a31dacAmerica/Los_AngelesaHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi...9ca0bbfd2ada3d686f1c3a136c21eafcSan Juan Capistrano ScheduleNonescheduleNaN2de8641db93b73d3c99194f73c097d37False
154b9b6949a4dd7548ea9733d62099fd0fa2023-10-18696beb3cb2e375f8524ae18eff0d041dAmerica/Los_AngelesaHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi...b62b4267caca504e1057c52a91611c16Eastern Sierra FlexNonescheduleNaNe6ef2ac8d134a1ba042ce6df1a4b1efdFalse
1752a6176818feb911d0d444d7268594cc32023-10-18062563b11ac99ddec6d3bec6f613b78dAmerica/Los_AngelesaHR0cHM6Ly9tamNhY3Rpb24uY29tL01KQ19HVEZTX1B1Ym...c599bcb93f9c039473932479eb082d7dMorro Bay ScheduleNonescheduleNaNa3dca9bf042f372998a3251c3f16053cFalse
18594e92e15f61f2e04ba18e4d54e98bb9b2023-10-181b77ef49f5bc70038cbf15e4f5f98477America/Los_AngelesaHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW...cd08875e7d95ed218f98b7694ac8ea3fCompton ScheduleNonescheduleNaN968c9d086e00b106991c299dc47b5c12False
1866c486bf9f5ff3c0fbceb3c091f8ae0092023-10-18fe662c95bddfb6e5fd75cb0afbb85cd8America/Los_AngelesaHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi...cde2b7a63ab7bb33141c8b02b001ea0fTCAT FlexNonescheduleNaNd5969d0cb382b296935540c720eedc6aFalse
19437f0266a8efb77b896d7dc17b0d154832023-10-18e055f64de6bf34b0d31d67b6d6e31dfbAmerica/Los_AngelesaHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi...da71c7121e987c3cd333d48ee24e23efRedwood Coast FlexNonescheduleNaNa5659c824eeba107b24ffdcbeb005d15False
195cac869872a0a2cc693f3fe371dbef0642023-10-18d95f2f26bbf4846e4eb84d352fb0990dAmerica/Los_AngelesaHR0cHM6Ly93d3cucmVkb25kby5vcmcvY2l2aWNheC9maW...da7e9e09d3eec6c7686adc21c8b28b63Beach Cities ScheduleNonescheduleNaN5bc7371dca26d74a99be945b18b3174eTrue
21598a8b1b5e7cb731c206dfc8eb105b4d72023-10-18d5b1960f462b2798ad3d2bbd7a77a01eAmerica/Los_AngelesaHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW...f61fbdf46f6ea735259b9d0c48139ebeArtesia ScheduleNonescheduleNaNdb2ef944fe6d4024e62c6cf638091fceFalse
22652f018f92d2bc0d86e60b4e3fa5cea482023-10-18d1fd5c603d131faf826423a8f00aa7acAmerica/Los_AngelesaHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3...ff51495a24cf286ff2f7bc9b3401f855Blossom Express ScheduleNonescheduleNaN525d6d12e42890a9df6d7c95db907b0dFalse
\n", - "
" - ], - "text/plain": [ - " key date \\\n", - "62 c927bb3d92c13a63a7900caa77f4bee0 2023-10-18 \n", - "64 fd77340aba25ef0767ecebcba3f0d0de 2023-10-18 \n", - "102 bdd83751c48788bf23a2a9c571bd0d5e 2023-10-18 \n", - "111 a1647ad5f590c79d0b5f185b09856064 2023-10-18 \n", - "113 fcb8bd67a2f3bfdac5d819feb5b14f04 2023-10-18 \n", - "115 f4e59cc5e52efaa8fad075e7aa9d693e 2023-10-18 \n", - "121 2353fa44db8fac09e9b2a994eb579953 2023-10-18 \n", - "134 02ddc9b2168f50ae0bdce2fa101ce958 2023-10-18 \n", - "154 b9b6949a4dd7548ea9733d62099fd0fa 2023-10-18 \n", - "175 2a6176818feb911d0d444d7268594cc3 2023-10-18 \n", - "185 94e92e15f61f2e04ba18e4d54e98bb9b 2023-10-18 \n", - "186 6c486bf9f5ff3c0fbceb3c091f8ae009 2023-10-18 \n", - "194 37f0266a8efb77b896d7dc17b0d15483 2023-10-18 \n", - "195 cac869872a0a2cc693f3fe371dbef064 2023-10-18 \n", - "215 98a8b1b5e7cb731c206dfc8eb105b4d7 2023-10-18 \n", - "226 52f018f92d2bc0d86e60b4e3fa5cea48 2023-10-18 \n", - "\n", - " feed_key feed_timezone \\\n", - "62 a9a4672431e928089176517c3297db66 America/Los_Angeles \n", - "64 f561f3f554f4ef3f22121116be67b2f0 US/Pacific \n", - "102 54c22d251df6fdaf2b3c1d699f4a739b America/Los_Angeles \n", - "111 e4af9a8cc80c88b868f066824d992d9f America/Los_Angeles \n", - "113 07feb14721d9fe332a8e7fb37bf625dd America/New_York \n", - "115 a43c08f7ed73ee9e88204a260933b461 America/Los_Angeles \n", - "121 6cf65a84397884d0ffb44db5b8a08bd4 America/Los_Angeles \n", - "134 e68cd2ffe7bb1760d28f3a94e3a31dac America/Los_Angeles \n", - "154 696beb3cb2e375f8524ae18eff0d041d America/Los_Angeles \n", - "175 062563b11ac99ddec6d3bec6f613b78d America/Los_Angeles \n", - "185 1b77ef49f5bc70038cbf15e4f5f98477 America/Los_Angeles \n", - "186 fe662c95bddfb6e5fd75cb0afbb85cd8 America/Los_Angeles \n", - "194 e055f64de6bf34b0d31d67b6d6e31dfb America/Los_Angeles \n", - "195 d95f2f26bbf4846e4eb84d352fb0990d America/Los_Angeles \n", - "215 d5b1960f462b2798ad3d2bbd7a77a01e America/Los_Angeles \n", - "226 d1fd5c603d131faf826423a8f00aa7ac America/Los_Angeles \n", - "\n", - " base64_url \\\n", - "62 aHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW... \n", - "64 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", - "102 aHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW... \n", - "111 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", - "113 aHR0cDovL3JpZGVndHJhbnMuY29tL2d0ZnMuemlw \n", - "115 aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3... \n", - "121 aHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW... \n", - "134 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", - "154 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", - "175 aHR0cHM6Ly9tamNhY3Rpb24uY29tL01KQ19HVEZTX1B1Ym... \n", - "185 aHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW... \n", - "186 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", - "194 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", - "195 aHR0cHM6Ly93d3cucmVkb25kby5vcmcvY2l2aWNheC9maW... \n", - "215 aHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW... \n", - "226 aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3... \n", - "\n", - " gtfs_dataset_key gtfs_dataset_name \\\n", - "62 4a9e6a8b8db445bc9fc3cf398ded67b1 Glendora Schedule \n", - "64 4b84ffbdc2b6abb171a5df6ce8f06797 Wasco DAR Schedule \n", - "102 73e1cb24210dab4feb13fbf2924939d2 Maywood Schedule \n", - "111 82a0ba5e020f951f6e780761537ef12b Stanislaus Flex \n", - "113 85e9d75e8430f242e9f5600d2f5c6964 G Trans Schedule \n", - "115 88780135c261b5b1391afdd5d562e5ba Tracy Schedule \n", - "121 8e3f49cfd30b44746a0724ca60e596ca Inglewood Schedule \n", - "134 9ca0bbfd2ada3d686f1c3a136c21eafc San Juan Capistrano Schedule \n", - "154 b62b4267caca504e1057c52a91611c16 Eastern Sierra Flex \n", - "175 c599bcb93f9c039473932479eb082d7d Morro Bay Schedule \n", - "185 cd08875e7d95ed218f98b7694ac8ea3f Compton Schedule \n", - "186 cde2b7a63ab7bb33141c8b02b001ea0f TCAT Flex \n", - "194 da71c7121e987c3cd333d48ee24e23ef Redwood Coast Flex \n", - "195 da7e9e09d3eec6c7686adc21c8b28b63 Beach Cities Schedule \n", - "215 f61fbdf46f6ea735259b9d0c48139ebe Artesia Schedule \n", - "226 ff51495a24cf286ff2f7bc9b3401f855 Blossom Express Schedule \n", - "\n", - " regional_feed_type type n service_key \\\n", - "62 None schedule NaN f157fa35c6207fceeb9d883a62caa016 \n", - "64 None schedule NaN 570f0f680cba9575db030462cbba50d1 \n", - "102 None schedule NaN 7535ce68155dde3462a1248edc9919e4 \n", - "111 None schedule NaN 98322f54467a89178241a42a03c93c66 \n", - "113 None schedule NaN 3e5d9847cfc0bcefbe11f601ad358690 \n", - "115 None schedule NaN 242716bc3da3ddd600eeba2cd310fa1c \n", - "121 None schedule NaN c4ae3c5830ed2661c7b76bb8c946fc72 \n", - "134 None schedule NaN 2de8641db93b73d3c99194f73c097d37 \n", - "154 None schedule NaN e6ef2ac8d134a1ba042ce6df1a4b1efd \n", - "175 None schedule NaN a3dca9bf042f372998a3251c3f16053c \n", - "185 None schedule NaN 968c9d086e00b106991c299dc47b5c12 \n", - "186 None schedule NaN d5969d0cb382b296935540c720eedc6a \n", - "194 None schedule NaN a5659c824eeba107b24ffdcbeb005d15 \n", - "195 None schedule NaN 5bc7371dca26d74a99be945b18b3174e \n", - "215 None schedule NaN db2ef944fe6d4024e62c6cf638091fce \n", - "226 None schedule NaN 525d6d12e42890a9df6d7c95db907b0d \n", - "\n", - " service_any_feed \n", - "62 False \n", - "64 False \n", - "102 False \n", - "111 False \n", - "113 False \n", - "115 False \n", - "121 False \n", - "134 False \n", - "154 False \n", - "175 False \n", - "185 False \n", - "186 False \n", - "194 False \n", - "195 True \n", - "215 False \n", - "226 False " - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "feeds_on_target.apply(check_defined_elsewhere, axis=1, args=[feeds_on_target]) >> filter(_.n.isna())" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "467b6dbb-6c7c-4a8e-8a34-e5d2f478524a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(227, 11)" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "feeds_on_target.shape" - ] - }, - { - "cell_type": "markdown", - "id": "33cedf86-ce2e-48c9-b496-3c9e42693aca", - "metadata": {}, - "source": [ - "## Stops --> Region matching\n", - "\n", - "* many to many OK -- feeds crossing boundaries shall appear in both regions" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "ae9afaa4-64da-4b62-a22d-d60671278761", - "metadata": {}, - "outputs": [], - "source": [ - "regions = conveyal_vars.conveyal_regions" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "1f021355-1f40-4121-8a6d-32eeb4a1c52e", - "metadata": {}, - "outputs": [], - "source": [ - "import shapely" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "87642e47-80eb-4a21-81c4-e41f467a7c8d", - "metadata": {}, - "outputs": [], - "source": [ - "# https://shapely.readthedocs.io/en/stable/reference/shapely.box.html#shapely.box\n", - "# xmin, ymin, xmax, ymax\n", - "to_bbox = lambda x: [x['west'], x['south'], x['east'], x['north']]" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "328b7a62-e5b7-47b8-b0e2-21f971b8e9e0", - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.DataFrame(regions).transpose().reset_index().rename(columns={'index':'region'})" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "5b8d6fd6-6847-4121-a6c8-212a7d01f5b7", - "metadata": {}, - "outputs": [], - "source": [ - "df['bbox'] = df.apply(to_bbox, axis=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "4a3502e5-7240-4bb1-bf33-28c96c316c3a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
regionnorthsoutheastwestbbox
0norcal42.0390939.07038-119.60541-124.49158[-124.49158, 39.07038, -119.60541, 42.03909]
1central39.6416535.87347-117.53174-123.83789[-123.83789, 35.87347, -117.53174, 39.64165]
2socal35.8935032.50050-114.13121-121.46759[-121.46759, 32.5005, -114.13121, 35.8935]
3mojave37.8162934.89945-114.59015-118.38043[-118.38043, 34.89945, -114.59015, 37.81629]
\n", - "
" - ], - "text/plain": [ - " region north south east west \\\n", - "0 norcal 42.03909 39.07038 -119.60541 -124.49158 \n", - "1 central 39.64165 35.87347 -117.53174 -123.83789 \n", - "2 socal 35.89350 32.50050 -114.13121 -121.46759 \n", - "3 mojave 37.81629 34.89945 -114.59015 -118.38043 \n", - "\n", - " bbox \n", - "0 [-124.49158, 39.07038, -119.60541, 42.03909] \n", - "1 [-123.83789, 35.87347, -117.53174, 39.64165] \n", - "2 [-121.46759, 32.5005, -114.13121, 35.8935] \n", - "3 [-118.38043, 34.89945, -114.59015, 37.81629] " - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "97d117b1-8835-473a-893b-ead256f4fba5", - "metadata": {}, - "outputs": [], - "source": [ - "df['geometry'] = df.apply(lambda x: shapely.geometry.box(*x.bbox), axis = 1)\n", - "df = df >> select(-_.bbox)" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "6b9402e0-24de-43c4-b04e-0c903a4657d6", - "metadata": {}, - "outputs": [], - "source": [ - "region_gdf = gpd.GeoDataFrame(df, crs=WGS84).to_crs(CA_NAD83Albers)" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "01160e41-81f6-45b8-ae76-86041b8f478f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Make this Notebook Trusted to load map: File -> Trust Notebook
" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "region_gdf.explore()" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "98644c75-cd9c-4032-8406-2f4a14a6edfd", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/lib/python3.9/site-packages/sqlalchemy_bigquery/_types.py:101: SAWarning: Did not recognize type 'GEOGRAPHY' of column 'pt_geom'\n", - " sqlalchemy.util.warn(\n" - ] - } - ], - "source": [ - "all_stops = gtfs_utils_v2.get_stops(selected_date=target_date, operator_feeds=feeds_on_target.feed_key).to_crs(CA_NAD83Albers)" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "64a57fbd-26da-47d0-b549-254961afd70d", - "metadata": {}, - "outputs": [], - "source": [ - "all_stops = all_stops >> select(_.key, _.feed_key, _.stop_id, _.geometry, _.stop_name)" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "id": "d1f999fc-9295-4478-85cf-d5cc6e0b5956", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(88593, 5)" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "all_stops.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "id": "9a15d156-683f-4f15-8638-67054462baa0", - "metadata": {}, - "outputs": [], - "source": [ - "test_join = gpd.sjoin(region_gdf, all_stops)" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "id": "306c2065-0c23-43a5-aa47-c2950615cccf", - "metadata": {}, - "outputs": [], - "source": [ - "# confirm that overlaps are counted\n", - "# overlaps = test_join >> count(_.key) >> filter(_.n > 1)\n", - "# (all_stops >> inner_join(_, overlaps, on = 'key')).explore()" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "id": "02847606-a347-4f48-827a-eca90ae45207", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
regionnorthsoutheastwestgeometryindex_rightkeyfeed_keystop_idstop_name
0norcal42.0390939.07038-119.60541-124.49158POLYGON ((34109.459 117232.679, 32737.676 4468...7871164fa4388fc2492d469f6049e40c14f207bd3d8c32eda4869c4d7f8bf2aec5bb0db86bc0b-d3af-4163-a30a-6ece57770dbeColfax (Standlock Bottle Shop)
1central39.6416535.87347-117.53174-123.83789POLYGON ((222589.608 -235276.505, 211687.216 1...7871164fa4388fc2492d469f6049e40c14f207bd3d8c32eda4869c4d7f8bf2aec5bb0db86bc0b-d3af-4163-a30a-6ece57770dbeColfax (Standlock Bottle Shop)
0norcal42.0390939.07038-119.60541-124.49158POLYGON ((34109.459 117232.679, 32737.676 4468...77704fabfb6cc39802b79ce2bb67e4fa3cd45102b5149d072a697aeda213b8b72811b4001Colfax Depot
\n", - "
" - ], - "text/plain": [ - " region north south east west \\\n", - "0 norcal 42.03909 39.07038 -119.60541 -124.49158 \n", - "1 central 39.64165 35.87347 -117.53174 -123.83789 \n", - "0 norcal 42.03909 39.07038 -119.60541 -124.49158 \n", - "\n", - " geometry index_right \\\n", - "0 POLYGON ((34109.459 117232.679, 32737.676 4468... 78711 \n", - "1 POLYGON ((222589.608 -235276.505, 211687.216 1... 78711 \n", - "0 POLYGON ((34109.459 117232.679, 32737.676 4468... 77704 \n", - "\n", - " key feed_key \\\n", - "0 64fa4388fc2492d469f6049e40c14f20 7bd3d8c32eda4869c4d7f8bf2aec5bb0 \n", - "1 64fa4388fc2492d469f6049e40c14f20 7bd3d8c32eda4869c4d7f8bf2aec5bb0 \n", - "0 fabfb6cc39802b79ce2bb67e4fa3cd45 102b5149d072a697aeda213b8b72811b \n", - "\n", - " stop_id stop_name \n", - "0 db86bc0b-d3af-4163-a30a-6ece57770dbe Colfax (Standlock Bottle Shop) \n", - "1 db86bc0b-d3af-4163-a30a-6ece57770dbe Colfax (Standlock Bottle Shop) \n", - "0 4001 Colfax Depot " - ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "test_join >> head(3)" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "40afc409-7952-4a92-8f6c-0e4757d14b61", - "metadata": {}, - "outputs": [], - "source": [ - "regions_and_feeds = test_join >> distinct(_.region, _.feed_key)" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "id": "85306aab-a9cf-4141-aa4b-5a4aa937e6ec", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(197, 2)" - ] - }, - "execution_count": 38, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "regions_and_feeds.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "id": "83802555-c94e-4c19-a842-6c0e5d033790", - "metadata": {}, - "outputs": [], - "source": [ - "regions_and_feeds = regions_and_feeds >> inner_join(_, feeds_on_target >> select(_.feed_key, _.gtfs_dataset_name, _.base64_url,\n", - " _.date), on = 'feed_key')" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "id": "457f1e27-5c02-4a29-a524-fbc62984bf27", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
regionfeed_keygtfs_dataset_namebase64_urldate
0norcal7bd3d8c32eda4869c4d7f8bf2aec5bb0Flixbus ScheduleaHR0cDovL2d0ZnMuZ2lzLmZsaXgudGVjaC9ndGZzX2dlbm...2023-10-18
1norcal7bd3d8c32eda4869c4d7f8bf2aec5bb0Flixbus ScheduleaHR0cDovL2d0ZnMuZ2lzLmZsaXgudGVjaC9ndGZzX2dlbm...2023-10-18
2central7bd3d8c32eda4869c4d7f8bf2aec5bb0Flixbus ScheduleaHR0cDovL2d0ZnMuZ2lzLmZsaXgudGVjaC9ndGZzX2dlbm...2023-10-18
\n", - "
" - ], - "text/plain": [ - " region feed_key gtfs_dataset_name \\\n", - "0 norcal 7bd3d8c32eda4869c4d7f8bf2aec5bb0 Flixbus Schedule \n", - "1 norcal 7bd3d8c32eda4869c4d7f8bf2aec5bb0 Flixbus Schedule \n", - "2 central 7bd3d8c32eda4869c4d7f8bf2aec5bb0 Flixbus Schedule \n", - "\n", - " base64_url date \n", - "0 aHR0cDovL2d0ZnMuZ2lzLmZsaXgudGVjaC9ndGZzX2dlbm... 2023-10-18 \n", - "1 aHR0cDovL2d0ZnMuZ2lzLmZsaXgudGVjaC9ndGZzX2dlbm... 2023-10-18 \n", - "2 aHR0cDovL2d0ZnMuZ2lzLmZsaXgudGVjaC9ndGZzX2dlbm... 2023-10-18 " - ] - }, - "execution_count": 40, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "regions_and_feeds >> head(3)" - ] - }, - { - "cell_type": "markdown", - "id": "1b42f3f8-c14a-41ff-ab0f-9cc63129a3f1", - "metadata": { - "jp-MarkdownHeadingCollapsed": true, - "tags": [] - }, - "source": [ - "## Validation steps here!\n", - "\n", - "* try loading all feeds without validating for now, circle back once we know what Conveyal errors on?" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "919ebd9e-e64f-445f-965d-faacb29b4190", - "metadata": {}, - "outputs": [], - "source": [ - "validation_tbl = tbls.mart_gtfs_quality.fct_daily_schedule_feed_validation_notices()" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "d6aef82c-2ada-4cd4-80fa-1a0edae7d056", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "datetime.date(2023, 9, 13)" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "target_date" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "6a43714b-6eb2-46b0-96a4-46878022c5f1", - "metadata": {}, - "outputs": [], - "source": [ - "## apparently busted since moving to v4.1 on Sep 1??\n", - "validation_df = (validation_tbl >> filter(_.date == target_date)\n", - " >> filter(_.severity == 'ERROR',\n", - " _.total_notices > 0)\n", - " >> distinct(_.date, _.base64_url, _.severity,\n", - " _.total_notices)\n", - " ) >> collect()" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "26fe6c3b-7533-4a91-ae84-236c8f29320b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datebase64_urlseveritytotal_notices
02023-09-13aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi...ERROR176
12023-09-13aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi...ERROR772
22023-09-13aHR0cHM6Ly9hcnQudHJpcHNob3QuY29tL3YxL2d0ZnMuem...ERROR1
32023-09-13aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi...ERROR8
42023-09-13aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi...ERROR3
...............
1652023-09-13aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi...ERROR18
1662023-09-13aHR0cHM6Ly91bml0cmFucy51Y2RhdmlzLmVkdS9tZWRpYS...ERROR8
1672023-09-13aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZW...ERROR112
1682023-09-13aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZW...ERROR22
1692023-09-13aHR0cHM6Ly93ZWJzZXJ2aWNlcy51bW9pcS5jb20vYXBpL2...ERROR1
\n", - "

170 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " date base64_url severity \\\n", - "0 2023-09-13 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... ERROR \n", - "1 2023-09-13 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... ERROR \n", - "2 2023-09-13 aHR0cHM6Ly9hcnQudHJpcHNob3QuY29tL3YxL2d0ZnMuem... ERROR \n", - "3 2023-09-13 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... ERROR \n", - "4 2023-09-13 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... ERROR \n", - ".. ... ... ... \n", - "165 2023-09-13 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... ERROR \n", - "166 2023-09-13 aHR0cHM6Ly91bml0cmFucy51Y2RhdmlzLmVkdS9tZWRpYS... ERROR \n", - "167 2023-09-13 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZW... ERROR \n", - "168 2023-09-13 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZW... ERROR \n", - "169 2023-09-13 aHR0cHM6Ly93ZWJzZXJ2aWNlcy51bW9pcS5jb20vYXBpL2... ERROR \n", - "\n", - " total_notices \n", - "0 176 \n", - "1 772 \n", - "2 1 \n", - "3 8 \n", - "4 3 \n", - ".. ... \n", - "165 18 \n", - "166 8 \n", - "167 112 \n", - "168 22 \n", - "169 1 \n", - "\n", - "[170 rows x 4 columns]" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "validation_df" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "6428e781-d345-4379-990c-4cae27e6a9a2", - "metadata": {}, - "outputs": [], - "source": [ - "with_notices = feeds_on_target >> left_join(_, validation_df, on =['base64_url', 'date'])" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "8d888e61-816e-45a5-9365-d0fcff13ba1b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
keydatefeed_keyfeed_timezonebase64_urlgtfs_dataset_keygtfs_dataset_nametyperegional_feed_typenameseveritytotal_notices
229a35f0497a5339c81027045e76c3baadb2023-09-134fd4c630d0a1a588ef7934cc55c338bbAmerica/Los_AngelesaHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZW...dcb6fb26f2d08393f9b0b1c14dc8775cBay Area 511 Tri-Valley Wheels SchedulescheduleRegional SubfeedBay Area 511 Tri-Valley Wheels ScheduleERROR3888.0
257baba29dc5d70db76aa154d21034284bb2023-09-130acb41ac5426e39f694c65f839d32c9cAmerica/Los_AngelesaHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3...f91395e3131f298c6965235903e07b9bYARTS SchedulescheduleNoneYARTS ScheduleERROR3538.0
1993a16ab862ef89719a7da380069ca88672023-09-13de92bc3c6f71c498611c06e07c5a381fAmerica/Los_AngelesaHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3...c5790c908973d410e5a5cf3ee121eccaMountain Transit SchedulescheduleNoneMountain Transit ScheduleERROR3372.0
213694480a725e046f95b8428f5297284092023-09-13fe662c95bddfb6e5fd75cb0afbb85cd8America/Los_AngelesaHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi...cde2b7a63ab7bb33141c8b02b001ea0fTCAT FlexscheduleNoneTCAT FlexERROR3105.0
214694480a725e046f95b8428f5297284092023-09-13fe662c95bddfb6e5fd75cb0afbb85cd8America/Los_AngelesaHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi...cde2b7a63ab7bb33141c8b02b001ea0fTCAT FlexscheduleNoneTCAT FlexERROR3099.0
\n", - "
" - ], - "text/plain": [ - " key date \\\n", - "229 a35f0497a5339c81027045e76c3baadb 2023-09-13 \n", - "257 baba29dc5d70db76aa154d21034284bb 2023-09-13 \n", - "199 3a16ab862ef89719a7da380069ca8867 2023-09-13 \n", - "213 694480a725e046f95b8428f529728409 2023-09-13 \n", - "214 694480a725e046f95b8428f529728409 2023-09-13 \n", - "\n", - " feed_key feed_timezone \\\n", - "229 4fd4c630d0a1a588ef7934cc55c338bb America/Los_Angeles \n", - "257 0acb41ac5426e39f694c65f839d32c9c America/Los_Angeles \n", - "199 de92bc3c6f71c498611c06e07c5a381f America/Los_Angeles \n", - "213 fe662c95bddfb6e5fd75cb0afbb85cd8 America/Los_Angeles \n", - "214 fe662c95bddfb6e5fd75cb0afbb85cd8 America/Los_Angeles \n", - "\n", - " base64_url \\\n", - "229 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZW... \n", - "257 aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3... \n", - "199 aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3... \n", - "213 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", - "214 aHR0cHM6Ly9ndGZzLmNhbGl0cC5vcmcvcHJvZHVjdGlvbi... \n", - "\n", - " gtfs_dataset_key \\\n", - "229 dcb6fb26f2d08393f9b0b1c14dc8775c \n", - "257 f91395e3131f298c6965235903e07b9b \n", - "199 c5790c908973d410e5a5cf3ee121ecca \n", - "213 cde2b7a63ab7bb33141c8b02b001ea0f \n", - "214 cde2b7a63ab7bb33141c8b02b001ea0f \n", - "\n", - " gtfs_dataset_name type regional_feed_type \\\n", - "229 Bay Area 511 Tri-Valley Wheels Schedule schedule Regional Subfeed \n", - "257 YARTS Schedule schedule None \n", - "199 Mountain Transit Schedule schedule None \n", - "213 TCAT Flex schedule None \n", - "214 TCAT Flex schedule None \n", - "\n", - " name severity total_notices \n", - "229 Bay Area 511 Tri-Valley Wheels Schedule ERROR 3888.0 \n", - "257 YARTS Schedule ERROR 3538.0 \n", - "199 Mountain Transit Schedule ERROR 3372.0 \n", - "213 TCAT Flex ERROR 3105.0 \n", - "214 TCAT Flex ERROR 3099.0 " - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "with_notices >> arrange(-_.total_notices) >> head(5)" - ] - }, - { - "cell_type": "markdown", - "id": "6e9862a5-acc7-4d65-882c-530390726ec5", - "metadata": {}, - "source": [ - "## Download raw from GCS (for Conveyal)" - ] - }, - { - "cell_type": "code", - "execution_count": 81, - "id": "2befb3c9-36ff-4128-8561-31f2d7aea1cd", - "metadata": {}, - "outputs": [], - "source": [ - "fs = get_fs()" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "id": "5b6d7cb9-8557-4f48-befb-8522efaea5df", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "dict_keys(['norcal', 'central', 'socal', 'mojave'])" - ] - }, - "execution_count": 82, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "regions.keys()" - ] - }, - { - "cell_type": "code", - "execution_count": 102, - "id": "56df1e83-9887-4aaf-9fc3-4bf2ec794a0f", - "metadata": {}, - "outputs": [], - "source": [ - "def download_feed(row):\n", - " # need wildcard for file too -- not all are gtfs.zip!\n", - " uri = f'gs://calitp-gtfs-schedule-raw-v2/schedule/dt={row.date.strftime(\"%Y-%m-%d\")}/*/base64_url={row.base64_url}/*.zip'\n", - " fs.get(uri, f'{row.path}/{row.gtfs_dataset_name.replace(\" \", \"_\")}_{row.feed_key}_gtfs.zip')\n", - " # print(f'downloaded {row.path}/{row.feed_key}_gtfs.zip')" - ] - }, - { - "cell_type": "code", - "execution_count": 103, - "id": "c88007b8-d075-402d-80af-a317e18a112e", - "metadata": {}, - "outputs": [], - "source": [ - "def download_region(feeds_df, region: str):\n", - " \n", - " assert region in regions.keys()\n", - " path = f'./feeds_{feeds_df.date.iloc[0].strftime(\"%Y-%m-%d\")}/{region}'\n", - " if not os.path.exists(path): os.makedirs(path)\n", - " region = (feeds_df >> filter(_.region == region)).copy()\n", - " region['path'] = path\n", - " region.progress_apply(download_feed, axis = 1)" - ] - }, - { - "cell_type": "code", - "execution_count": 104, - "id": "492a57c5-d64f-4293-88fe-bd8febb3ef2d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
regionn
0central82
1mojave6
2norcal25
3socal84
\n", - "
" - ], - "text/plain": [ - " region n\n", - "0 central 82\n", - "1 mojave 6\n", - "2 norcal 25\n", - "3 socal 84" - ] - }, - "execution_count": 104, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "regions_and_feeds >> count(_.region)" - ] - }, - { - "cell_type": "code", - "execution_count": 105, - "id": "5fc9e426-e1ca-44fb-a0d3-107c145b0fec", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "446941c220614e509a402c2e3e4fbbb4", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/4 [00:00> filter(_.region == region)).copy() + region['path'] = path + region.progress_apply(download_feed, axis = 1) + +def generate_script(regions): + # https://docs.conveyal.com/prepare-inputs#preparing-the-osm-data + cmds = [] + for region in regions.keys(): + cmd = f'''osmosis --read-pbf us-west-latest.osm.pbf --bounding-box left={regions[region]['west']} bottom={regions[region]['south']} right={regions[region]['east']} top={regions[region]['north']} --tf accept-ways highway=* public_transport=platform railway=platform park_ride=* --tf accept-relations type=restriction --used-node --write-pbf {region}-processed.pbf''' + cmds += [cmd] + with open('crop_filter_osm.sh', "w") as f: + f.write('#!/bin/bash\n') + f.write('wget http://download.geofabrik.de/north-america/us-west-latest.osm.pbf\n') + f.write('\n'.join(cmds)) + +if __name__ == '__main__': + + for region in tqdm(regions.keys()): + download_region(regions_and_feeds, region) + shutil.make_archive(f'feeds_{target_date}', 'zip', f'./feeds_{target_date}/') + fs.put(f'feeds_{target_date}.zip', f'{conveyal_vars.gcs_path}feeds_{target_date}.zip') + generate_script(regions) \ No newline at end of file diff --git a/conveyal_update/old_PrepGTFSFeeds.ipynb b/conveyal_update/old_PrepGTFSFeeds.ipynb deleted file mode 100644 index eac5b65ba..000000000 --- a/conveyal_update/old_PrepGTFSFeeds.ipynb +++ /dev/null @@ -1,265 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "2c980ecf-1262-4099-b4e2-f66622ae8f9b", - "metadata": {}, - "source": [ - "# Old script from Conveyal for reference!\n", - "\n", - "* aka the hard way" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "af2089fa", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import pandas\n", - "import requests\n", - "import yaml\n", - "import math\n", - "from zipfile import ZipFile\n", - "\n", - "# Download composite GTFS files from data.ca.gov (note: schema has since changed)\n", - "prefix = 'https://urldefense.com/v3/__https://data.ca.gov/dataset/de6f1544-b162-4d16-997b-c183912c8e62/resource/'*5Cn__;JQ!!LWi6xHDyrA!74r966TgmHqnpM1IjGtbx-Gy1A31nIfAmlQa0JWgflqOPTrP4A4pPHr1ApHdB3iKkcnkBmZ11Aflg-QpRykoAbMhUw$ sourceFiles = {\n", - " 'agency': 'e8f9d49e-2bb6-400b-b01f-28bc2e0e7df2/download/agency.csv',\n", - " 'routes': 'c6bbb637-988f-431c-8444-aef7277297f8/download/routes.csv',\n", - " 'stops': '8c876204-e12b-48a2-8299-10f6ae3d4f2b/download/stops.csv',\n", - " 'stop_times': 'd31eef2f-e223-4ca4-a86b-170acc6b2590/download/stop_times.csv',\n", - " 'trips': '0e4da89e-9330-43f8-8de9-305cb7d4918f/download/trips.csv',\n", - " 'calendar': 'a79f10b8-b322-43f3-b3f4-ba46a8dbe9ab/download/calendar.csv',\n", - " 'calendar_dates': '06a21a8e-dba3-4e7e-8726-f2e992cc1a80/download/calendar_dates.csv',\n", - " 'feed_info': '50d12559-635e-4222-ac25-3706c066902d/download/feed_info.csv',\n", - " 'frequencies': '48542c8f-8ce1-43e3-a965-6c68771d6fe5/download/frequencies.csv',\n", - " 'shapes': '2f5e7bdb-33e8-4633-b163-6bab42ad0951/download/shapes.csv'\n", - "}\n", - "\n", - "# Formatting issue: data.ca.gov adds dashes to dates.\n", - "dateColumns = ['date', 'start_date', 'end_date', 'feed_start_date', 'feed_end_date']\n", - "intColumns = ['pickup_type', 'drop_off_type', 'timepoint', 'direction_id', 'location_type', 'wheelchair_accessible', 'bikes_allowed', 'exact_times']\n", - "\n", - "multiFeedAgencies = [182] # LA Metro\n", - "ignoredAgencies = [200] # MTC regional feed\n", - "\n", - "# TODO skip repeated URLs in agencies.yml file" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1df7d6ba", - "metadata": {}, - "outputs": [], - "source": [ - "for fileName in sourceFiles:\n", - " fileUrl = prefix + sourceFiles[fileName]\n", - " fileLoc = fileName + '.txt'\n", - " file = requests.get(fileUrl)\n", - " open(fileLoc, 'wb').write(file.content)\n", - " print('Downloaded source ' + fileName + ' file.')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "299c3d61-baba-4630-b2e3-83136e250d7e", - "metadata": {}, - "outputs": [], - "source": [ - "# Set bounds for northern california\n", - "north = 42.03909\n", - "south = 39.07038\n", - "east = -119.60541\n", - "west = -124.49158\n", - "\n", - "# ... or set bounds for central california\n", - "north = 39.64165\n", - "south = 35.87347\n", - "east = -117.53174\n", - "west = -123.83789\n", - "\n", - "# ... or set bounds for southern california\n", - "north = 35.8935\n", - "south = 32.5005\n", - "east = -114.13121\n", - "west = -121.46759\n", - "\n", - "# ... or set bounds for mojave\n", - "north = 37.81629\n", - "south = 34.89945\n", - "east = -114.59015\n", - "west = -118.38043" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6f6dc9d3", - "metadata": {}, - "outputs": [], - "source": [ - "stops = pandas.read_csv('stops.txt')\n", - "\n", - "keysToTrack = set()\n", - "\n", - "for index, stop in stops.iterrows():\n", - " if (stop.calitp_itp_id not in ignoredAgencies and (stop.calitp_url_number == 0 or stop.calitp_itp_id in multiFeedAgencies)):\n", - " if (stop.stop_lat > south and stop.stop_lat < north and stop.stop_lon > west and stop.stop_lon < east): \n", - " keysToTrack.add((stop.calitp_itp_id, stop.calitp_url_number))\n", - " \n", - "for key in keysToTrack:\n", - " stringKey = str(key[0]) + '_' + str(key[1])\n", - " if (not os.path.isdir('out/' + stringKey)):\n", - " os.mkdir('out/' + stringKey)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4bd9be97", - "metadata": {}, - "outputs": [], - "source": [ - "for file in sourceFiles:\n", - " print('Processing ' + file + ' file.')\n", - " df1 = pandas.read_csv(file + '.txt')\n", - " for key in keysToTrack:\n", - " outputFilename = 'out/'+ str(key[0]) + '_' + str(key[1]) + '/' + file + '.txt'\n", - " df = df1[(df1['calitp_itp_id'] == key[0]) & (df1['calitp_url_number'] == key[1])].copy()\n", - " # TODO filter out rows with missing id values (e.g. Tuolumne)\n", - " for col in dateColumns:\n", - " if(col in df.columns):\n", - " df[col] = df[col].str.replace('-','')\n", - " for col in intColumns:\n", - " if(col in df.columns):\n", - " df[col] = df[col].fillna(0).astype(int)\n", - " if (len(df) > 0):\n", - " df.to_csv(outputFilename, index=False) " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d9f9296a", - "metadata": {}, - "outputs": [], - "source": [ - "for key in keysToTrack:\n", - " stringKey = str(key[0]) + '_' + str(key[1])\n", - " with ZipFile('out/' + stringKey + '.zip', 'w') as zip:\n", - " for file in sourceFiles:\n", - " try:\n", - " zip.write('out/' + stringKey + '/' + file + '.txt', arcname=(file.split('/')[-1] + '.txt'))\n", - " except FileNotFoundError:\n", - " pass" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b0c8270e", - "metadata": {}, - "outputs": [], - "source": [ - "# Check for feeds that don't overlap the target date in calendar.txt or calendar_dates.txt\n", - "# Note that Conveyal bundle details show dates from feed_info, which may not reflect service accurately.\n", - "targetDate = '2022-11-30'\n", - "calitpAgences = {}\n", - "\n", - "with open('agencies.yml', 'r') as file:\n", - " agencies = yaml.safe_load(file)\n", - " \n", - "for agency in agencies:\n", - " calitpAgences[agencies[agency]['itp_id']] = agency\n", - "\n", - "calendar = pandas.read_csv('calendar.txt')\n", - "calendarDates = pandas.read_csv('calendar_dates.txt')\n", - "\n", - "dateRange = pandas.DataFrame(columns = ['key', 'c_start', 'c_end', 'cd_start', 'cd_end'])\n", - "dateRange.set_index('key', inplace=True)\n", - "\n", - "for index, row in calendar.iterrows():\n", - " if (row.calitp_itp_id not in ignoredAgencies and (row.calitp_url_number == 0 or row.calitp_itp_id in multiFeedAgencies)):\n", - " key = str(row.calitp_itp_id) + '_' + str(row.calitp_url_number)\n", - " if key in dateRange.index:\n", - " dateRange.at[key, 'c_start'] = min(dateRange.at[key, 'c_start'], row.start_date)\n", - " dateRange.at[key, 'c_end'] = max(dateRange.at[key, 'c_end'], row.end_date)\n", - " else:\n", - " dateRange.at[key, 'c_start'] = row.start_date\n", - " dateRange.at[key, 'c_end'] = row.end_date\n", - " dateRange.at[key, 'cd_start'] = '9999-99-99'\n", - " dateRange.at[key, 'cd_end'] = '0000-00-00'\n", - "\n", - "for index, row in calendarDates.iterrows():\n", - " if (row.calitp_itp_id not in ignoredAgencies and (row.calitp_url_number == 0 or row.calitp_itp_id in multiFeedAgencies)):\n", - " key = str(row.calitp_itp_id) + '_' + str(row.calitp_url_number)\n", - " if key in dateRange.index:\n", - " dateRange.at[key, 'cd_start'] = min(dateRange.at[key, 'cd_start'], row.date)\n", - " dateRange.at[key, 'cd_end'] = max(dateRange.at[key, 'cd_end'], row.date)\n", - " else:\n", - " dateRange.at[key, 'cd_start'] = '9999-99-99'\n", - " dateRange.at[key, 'cd_end'] = '0000-00-00'\n", - "\n", - "# log dates to manually check for overlap\n", - "for index, row in dateRange.iterrows():\n", - " if row.c_start > targetDate or row.c_end < targetDate:\n", - " agency = calitpAgences[int(index.split('_')[0])]\n", - " print(agency + ', ' + index + ', ' + row.c_start + ', ' + row.c_end + ', ' + row.cd_start + ', ' + row.cd_end)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "06952d70", - "metadata": {}, - "outputs": [], - "source": [ - "# Scratch: read agency data\n", - "\n", - "# Prereq: ensure https://urldefense.com/v3/__https://github.com/cal-itp/data-infra/blob/main/airflow/data/agencies.yml__;!!LWi6xHDyrA!74r966TgmHqnpM1IjGtbx-Gy1A31nIfAmlQa0JWgflqOPTrP4A4pPHr1ApHdB3iKkcnkBmZ11Aflg-QpRykku6YuzQ$ has been downloaded\n", - "\n", - "# Load Cal-ITP agency id/name/URL info\n", - "# Note most agencies only have one URL, but some have multiple (e.g. LA Metro has separate URLs for bus and rail)\n", - "# So id and url number form a compound unique key\n", - "with open('agencies.yml', 'r') as file:\n", - " agencies = yaml.safe_load(file)\n", - " \n", - "for agency in agencies:\n", - " for urlNum in range(0, len(agencies[agency]['feeds'])):\n", - " os.mkdir('out/' + agency.replace('/','_') + '_' + str(urlNum))" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.13" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": {}, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}