-
Notifications
You must be signed in to change notification settings - Fork 0
/
2023-12-01-list-admins.py
138 lines (121 loc) · 4.2 KB
/
2023-12-01-list-admins.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#!/usr/bin/env python3
import pandas as pd
import subprocess
import os
from collections import defaultdict
cities_not_in_worldcities = {
"Provincetown": "Massachusetts",
"Bar Harbor": "Maine",
"Raleigh/Durham": "North Carolina",
"Martha's Vineyard": "Massachusetts",
"Bedford/Hanscom": "Massachusetts",
"Saranac Lake": "New York",
"Dulles": "Virginia",
"Westchester County": "New York",
"Hyannis": "Massachusetts",
"Saint Louis": "Missouri",
"Farmingdale": "New York",
"Belmar": "New Jersey",
"Pellston": "Michigan",
"Greensboro/High Point": "North Carolina",
"Latrobe": "Pennsylvania",
"Teterboro": "New Jersey",
"Stowe": "Vermont",
"Westhampton Beach": "New York",
"Page": "Arizona",
"Selinsgrove": "Pennsylvania",
"Hilton Head": "South Carolina",
"Eastport": "Maine",
"Salisbury-Ocean City": "Maryland",
"Dillon": "Montana",
"Placida": "Florida",
"Laporte": "Indiana",
"Saint Paul": "Minnesota",
"Farmville": "Virginia",
"Saint Augustine": "Florida",
"Mt Vernon": "Illinois",
"Fishers Island": "New York",
"Aspen": "Colorado",
"Ocean Reef": "Florida",
"Montauk Point": "New York",
"Wiscasset": "Maine",
"Port Clinton": "Ohio",
"Manteo": "North Carolina",
"Islesboro": "Maine",
"Houlton": "Maine",
"Currituck": "North Carolina",
"Lake Placid": "New York",
"Block Island": "Rhode Island",
"Rangeley": "Maine",
"Reedsville": "Pennsylvania",
"Kailua-Kona": "Hawaii",
"Edenton": "North Carolina",
"Millinocket": "Maine",
"Winnsboro": "Louisiana",
"Great Barrington": "Massachusetts",
"Blue Bell": "Pennsylvania",
"Kayenta": "Arizona",
"Bristol, VA/Johnson City/Kingsport": "Virginia",
"Mount Pocono": "Pennsylvania",
"Waller County": "Texas",
"Thomson": "Georgia",
"Saint Thomas": "Virgin Islands",
"Lorain/Elyria": "Ohio",
}
def get_arrivals(day, month):
if not os.path.exists("flight_data"):
os.makedirs("flight_data")
flight_data_path = f"flight_data/2023-{month:02d}-{day:02d}.csv"
if not os.path.exists(flight_data_path):
subprocess.check_call(
[
"aws",
"s3",
"cp",
f"s3://nao-bostraffic/Data/Arrivals/2023-{month:02d}-{day:02d}_BOS_Arrivals.csv",
flight_data_path,
]
)
return flight_data_path
def extract_flight_origin_data():
# https://simplemaps.com/static/data/world-cities/basic/simplemaps_worldcities_basicv1.76.zip
cities_data = pd.read_csv("worldcities.csv")
cities = cities_data["city_ascii"].values
unknown_counts = defaultdict(int)
total_origin_counts = defaultdict(int)
month_range = range(4, 13)
day_range = range(1, 32)
for month in month_range:
for day in day_range:
try:
flight_data_path = get_arrivals(day, month)
except:
print(f"No data for {month}-{day}")
continue
flight_data = pd.read_csv(flight_data_path)
for origin_city in flight_data["Origin"].values:
if origin_city not in cities:
try:
administrative_area = cities_not_in_worldcities[
origin_city
]
total_origin_counts[administrative_area] += 1
except:
administrative_area = "Unknown"
unknown_counts[origin_city] += 1
total_origin_counts[administrative_area] += 1
else:
administrative_area = cities_data[
cities_data["city_ascii"] == origin_city
][["admin_name"]].values[0][0]
total_origin_counts[administrative_area] += 1
with open("unassigned_cities.tsv", "w") as f:
for k, v in unknown_counts.items():
f.write(f"{k}\t{v}\n")
with open("total_origin_counts.tsv", "w") as f:
for k, v in total_origin_counts.items():
f.write(f"{k}\t{v}\n")
def start():
extract_flight_origin_data()
if __name__ == "__main__":
start()