-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathmap_labeled.py
73 lines (55 loc) · 1.97 KB
/
map_labeled.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#%%
import os
import pandas as pd
import geopandas as gpd
from dotenv import load_dotenv
import match.helpers as helpers
load_dotenv()
pd.options.display.max_columns = None
DATA_PATH = os.environ["WSB_STAGING_PATH"]
EPSG = os.environ["WSB_EPSG"]
#%%
labeled = gpd.read_file(os.path.join(DATA_PATH, "wsb_labeled_clean.gpkg"))
print("Read Labeled WSB file.")
pwsids = helpers.get_pwsids_of_interest()
print("Retrieved PWSID's of interest.")
#%%
# Filter to those in SDWIS
labeled = labeled[labeled["pwsid"].isin(pwsids)]
#%%
# Null out a few bad lat/long
mask = (
(labeled["centroid_lat"] < -90) | (labeled["centroid_lat"] > 90) |
(labeled["centroid_long"] < -180) | (labeled["centroid_long"] > 180))
labeled.loc[mask, "centroid_lat"] = pd.NA
labeled.loc[mask, "centroid_long"] = pd.NA
print(f"Nulled out {mask.sum()} bad lat/long.")
#%%
df = gpd.GeoDataFrame().assign(
source_system_id = labeled["pwsid"],
source_system = "labeled",
contributor_id = "labeled." + labeled["pwsid"],
master_key = labeled["pwsid"],
pwsid = labeled["pwsid"],
state = labeled["state"],
primacy_agency_code = labeled["pwsid"].str[0:2],
name = labeled["pws_name"],
# address_line_1 = labeled["location_address"],
city = labeled["city"],
# zip = labeled["postal_code"],
county = labeled["county"],
# Need to convert these to EPSG:4326 before we can save them
centroid_lat = labeled["centroid_lat"],
centroid_lon = labeled["centroid_long"],
centroid_quality = "CALCULATED FROM GEOMETRY",
geometry = labeled["geometry"],
geometry_source_detail = labeled["geometry_source_detail"]
)
#%%
print("Labeled record counts:")
print(df
.groupby("primacy_agency_code")
.size()
.sort_index())
# %%
helpers.load_to_postgis("labeled", df)