-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathmap_echo.py
74 lines (57 loc) · 2.06 KB
/
map_echo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#%%
import os
import pandas as pd
import geopandas as gpd
import match.helpers as helpers
from dotenv import load_dotenv
load_dotenv()
DATA_PATH = os.environ["WSB_STAGING_PATH"]
EPSG = os.environ["WSB_EPSG"]
#%%
usecols=[
"pwsid", "fac_lat", "fac_long", "fac_name",
"fac_street", "fac_city", "fac_state", "fac_zip", "fac_county",
"fac_collection_method", "fac_reference_point", "fac_accuracy_meters",
"fac_indian_cntry_flg", "fac_percent_minority", "fac_pop_den", "ejscreen_flag_us"]
echo_df = pd.read_csv(
os.path.join(DATA_PATH, "echo.csv"),
usecols=usecols, dtype="str")
#%%
pwsids = helpers.get_pwsids_of_interest()
# Filter to only those in our SDWIS list and with lat/long
# 47,951 SDWIS match to ECHO, 1494 don't match
echo_df = echo_df.loc[
echo_df["pwsid"].isin(pwsids) &
echo_df["fac_lat"].notna()].copy()
# If fac_state is NA, copy from pwsid
mask = echo_df["fac_state"].isna()
echo_df.loc[mask, "fac_state"] = echo_df.loc[mask, "pwsid"].str[0:2]
# Convert to geopandas
echo: gpd.GeoDataFrame = gpd.GeoDataFrame(
echo_df,
geometry=gpd.points_from_xy(echo_df["fac_long"], echo_df["fac_lat"]),
crs="EPSG:4326")
# Cleanse out "UNK"
echo = echo.replace({"UNK": pd.NA})
echo.head()
#%%
df = gpd.GeoDataFrame().assign(
source_system_id = echo["pwsid"],
source_system = "echo",
contributor_id = "echo." + echo["pwsid"],
master_key = echo["pwsid"],
pwsid = echo["pwsid"],
state = echo["fac_state"],
name = echo["fac_name"],
address_line_1 = echo["fac_street"],
city = echo["fac_city"],
county = echo["fac_county"],
zip = echo["fac_zip"],
primacy_agency_code = echo["pwsid"].str[0:2],
centroid_lat = echo["fac_lat"],
centroid_lon = echo["fac_long"],
geometry = echo["geometry"],
centroid_quality = echo["fac_collection_method"],
)
#%%
helpers.load_to_postgis("echo", df)