-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFinalprojectinvolvingUSwaterquality.py
62 lines (52 loc) · 1.83 KB
/
FinalprojectinvolvingUSwaterquality.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#%%
import pandas as pd
import numpy as np
import polars as pl
import folium
#make sure to install ipyleaflet
#https://catalog.data.gov/dataset/water-quality-data-0de37
pd.set_option('display.max_columns', None)
field_results = pl.read_csv(r"C:\Users\amcfa\gitfiles\Projects\MastersWork\FundamentalssofDataVisualizzations\Water quality data\field_results.csv", low_memory=False)
period_of_record = pl.read_csv(r"C:\Users\amcfa\gitfiles\Projects\MastersWork\FundamentalssofDataVisualizzations\Water quality data\period_of_record.csv",low_memory=False)
stations =pl.read_csv(r"C:\Users\amcfa\gitfiles\Projects\MastersWork\FundamentalssofDataVisualizzations\Water quality data\stations.csv",low_memory=False)
lab_results = pl.read_csv(r"C:\Users\amcfa\gitfiles\Projects\MastersWork\FundamentalssofDataVisualizzations\Water quality data\lab_results.csv",low_memory=False)
# %%
field_results
# %%
lab_results
stations
period_of_record
# %%
period_of_record
# %%
lab_results
# %%
stations
# %%
stations.sample(n=10)
# %%
stations.describe()
# %%
## Results grouped by area sounds like a good place to start maaybe results limits, lat/long
a = lab_results.select([pl.col('latitude'), pl.col('longitude'),pl.col('station_number'), pl.col('result'), pl.col('reporting_limit'), pl.col('units')])
# %%
b = stations.select([pl.col('latitude'), pl.col('longitude'), pl.col('station_number')])
# %%
c = field_results.select([pl.col('latitude'),pl.col('longitude'),pl.col('full_station_name'),pl.col('parameter'),pl.col('fdr_result'), pl.col('fdr_reporting_limit'), pl.col('uns_name')])
# %%
# %%
d = period_of_record.select([pl.col('latitude'),pl.col('longitude'),pl.col('sample_date_max'),pl.col('sample_date_min')])
# %%
##
# %%
a
# %%
b
# %%
c
# %%
d
# %%
m = folium.Map(location = [39.272938,-121.16])
m
# %%