From 6c5cb916dca3f8b644bcbd5d5ce2e75657d6d143 Mon Sep 17 00:00:00 2001 From: ren Date: Thu, 1 Oct 2020 18:40:42 -0400 Subject: [PATCH 1/2] Added code so that this analysis will mirror the work done by Hamza so the PowerPoint will be more concise --- notebooks/2.4-je-temperature-summary.ipynb | 4445 +++++++++++++++++++- 1 file changed, 4220 insertions(+), 225 deletions(-) diff --git a/notebooks/2.4-je-temperature-summary.ipynb b/notebooks/2.4-je-temperature-summary.ipynb index 9bda510..20c5f5e 100644 --- a/notebooks/2.4-je-temperature-summary.ipynb +++ b/notebooks/2.4-je-temperature-summary.ipynb @@ -4,7 +4,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# This is The Title of Notebook\n", + "# Temperture Analysis \n", + "\n", "### Purpose\n", "This notebook will look at comparing the usability of temperature readings between Christmas Bird Count Volunteers and NOAA Weather Stations.\n", "\n", @@ -58,7 +59,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 227, "metadata": {}, "outputs": [], "source": [ @@ -69,6 +70,8 @@ "from sklearn.metrics.pairwise import haversine_distances\n", "from sklearn.neighbors import DistanceMetric\n", "import plotly.graph_objects as go\n", + "from statistics import mode\n", + "import scipy\n", "\n", "#Options\n", "pd.set_option(\"display.max_columns\", 100)" @@ -90,7 +93,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 228, "metadata": {}, "outputs": [], "source": [ @@ -122,7 +125,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 229, "metadata": {}, "outputs": [], "source": [ @@ -132,14 +135,14 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 230, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "C:\\Users\\jacob\\anaconda3\\envs\\cmmdsjob\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3146: DtypeWarning: Columns (62,64,65,71,73,75,77) have mixed types.Specify dtype option on import or set low_memory=False.\n", + "/Users/rcdebaca/.pyenv/versions/funhacks371/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3063: DtypeWarning: Columns (62,64,65,71,73,75,77) have mixed types.Specify dtype option on import or set low_memory=False.\n", " interactivity=interactivity, compiler=compiler, result=result)\n" ] } @@ -150,7 +153,660 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 231, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
circle_namecountry_statelatloncount_yearcount_daten_field_countersn_feeder_countersmin_field_partiesmax_field_partiesfield_hoursfeeder_hoursnocturnal_hoursfield_distancenocturnal_distancedistance_unitsmin_tempmax_temptemp_unitmin_windmax_windwind_unitmin_snowmax_snowsnow_unitam_cloudpm_cloudfield_distance_imperialfield_distance_metricnocturnal_distance_imperialnocturnal_distance_metricmin_snow_imperialmin_snow_metricmax_snow_metricmax_snow_imperialmin_temp_imperialmax_temp_imperialmin_temp_metricmax_temp_metricmin_wind_metricmax_wind_metricmin_wind_imperialmax_wind_imperialuigeohash_circlecircle_ididlatitudelongitudeelevationstatenamegsn_flaghcn_crn_flagwmoidgeohash_stationtemp_min_valuetemp_max_valueprecipitation_valuetemp_avgsnowsnwdam_rainpm_rainam_snowpm_snowcircle_elevelevation_sourceblock_fipscounty_fipsEcosys_circleUsgsid_sys_circleNlcd_code_circleNlcd_circleEcosys_stationUsgsid_sys_stationNlcd_code_stationNlcd_station
756373SanningaruqUS-AK67.0833-162.966720042004-01-052.00.01.02.06.250.01.026.010.0Miles0.05.02.00.017.01.012.030.02.01.01.026.041.84100410.016.09269412.00000030.4876.2030.0000000.05.0-17.777778-15.0000000.00000027.3575800.000017.00067.0833-162.9667_2004b7ujb7ujes5USR0000AMTN67.1414-162.9944246.6AKMT. NOAK ALASKANaNb7ujNaNNaNNaNNaNNaNNaN333322.33ghcn_dNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
756374SanningaruqUS-AK67.1000-162.833319851985-01-016.0NaNNaNNaNNaN6.0NaNNaNNaNMiles25.032.0NaN10.025.0NaN6.036.0NaN6.06.0NaNNaNNaNNaN2.36220515.2491.4414.173228102.6115.2-3.8888890.00000016.09269440.2317356.214015.53567.1-162.8333_1985b7ujb7ujwxhUSR0000AMTN67.1414-162.9944246.6AKMT. NOAK ALASKANaNb7ujNaNNaNNaNNaNNaNNaN332242.54ghcn_dNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
756375SanningaruqUS-AK67.1000-162.833319861985-12-213.0NaNNaNNaNNaNNaNNaNNaNNaNMiles20.025.0NaN12.015.0NaN1.018.0NaN2.06.0NaNNaNNaNNaN0.3937012.5445.727.08661493.6102.6-6.666667-3.88888919.31123324.1390417.45689.32167.1-162.8333_1986b7ujb7ujwxhUSR0000AMTN67.1414-162.9944246.6AKMT. NOAK ALASKANaNb7ujNaNNaNNaNNaNNaNNaN332242.54ghcn_dNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
756376SanningaruqUS-AK67.1000-162.833319871986-12-187.0NaNNaNNaNNaNNaNNaNNaNNaNMiles28.033.0NaN10.010.0NaN18.030.0NaN7.07.0NaNNaNNaNNaN7.08661445.7276.2011.811024108.0117.0-2.2222220.55555616.09269416.0926946.21406.21467.1-162.8333_1987b7ujb7ujwxhUSR0000AMTN67.1414-162.9944246.6AKMT. NOAK ALASKANaNb7ujNaNNaNNaNNaNNaNNaN331142.54ghcn_dNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
756377Point BarrowUS-AK71.3333-156.666719761975-12-289.0NaNNaNNaNNaNNaNNaNNaNNaNMilesNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN71.3333-156.6667_1976bs8tbs8tfcyUSW0002750271.2833-156.78149.4AKBARROW POST ROGERS APGSN70026.0bs8t-267.0-206.03.0NaN3.0102.0NaNNaNNaNNaN0.31ghcn_dNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " circle_name country_state lat lon count_year count_date \\\n", + "756373 Sanningaruq US-AK 67.0833 -162.9667 2004 2004-01-05 \n", + "756374 Sanningaruq US-AK 67.1000 -162.8333 1985 1985-01-01 \n", + "756375 Sanningaruq US-AK 67.1000 -162.8333 1986 1985-12-21 \n", + "756376 Sanningaruq US-AK 67.1000 -162.8333 1987 1986-12-18 \n", + "756377 Point Barrow US-AK 71.3333 -156.6667 1976 1975-12-28 \n", + "\n", + " n_field_counters n_feeder_counters min_field_parties \\\n", + "756373 2.0 0.0 1.0 \n", + "756374 6.0 NaN NaN \n", + "756375 3.0 NaN NaN \n", + "756376 7.0 NaN NaN \n", + "756377 9.0 NaN NaN \n", + "\n", + " max_field_parties field_hours feeder_hours nocturnal_hours \\\n", + "756373 2.0 6.25 0.0 1.0 \n", + "756374 NaN NaN 6.0 NaN \n", + "756375 NaN NaN NaN NaN \n", + "756376 NaN NaN NaN NaN \n", + "756377 NaN NaN NaN NaN \n", + "\n", + " field_distance nocturnal_distance distance_units min_temp max_temp \\\n", + "756373 26.0 10.0 Miles 0.0 5.0 \n", + "756374 NaN NaN Miles 25.0 32.0 \n", + "756375 NaN NaN Miles 20.0 25.0 \n", + "756376 NaN NaN Miles 28.0 33.0 \n", + "756377 NaN NaN Miles NaN NaN \n", + "\n", + " temp_unit min_wind max_wind wind_unit min_snow max_snow \\\n", + "756373 2.0 0.0 17.0 1.0 12.0 30.0 \n", + "756374 NaN 10.0 25.0 NaN 6.0 36.0 \n", + "756375 NaN 12.0 15.0 NaN 1.0 18.0 \n", + "756376 NaN 10.0 10.0 NaN 18.0 30.0 \n", + "756377 NaN NaN NaN NaN NaN NaN \n", + "\n", + " snow_unit am_cloud pm_cloud field_distance_imperial \\\n", + "756373 2.0 1.0 1.0 26.0 \n", + "756374 NaN 6.0 6.0 NaN \n", + "756375 NaN 2.0 6.0 NaN \n", + "756376 NaN 7.0 7.0 NaN \n", + "756377 NaN NaN NaN NaN \n", + "\n", + " field_distance_metric nocturnal_distance_imperial \\\n", + "756373 41.841004 10.0 \n", + "756374 NaN NaN \n", + "756375 NaN NaN \n", + "756376 NaN NaN \n", + "756377 NaN NaN \n", + "\n", + " nocturnal_distance_metric min_snow_imperial min_snow_metric \\\n", + "756373 16.092694 12.000000 30.48 \n", + "756374 NaN 2.362205 15.24 \n", + "756375 NaN 0.393701 2.54 \n", + "756376 NaN 7.086614 45.72 \n", + "756377 NaN NaN NaN \n", + "\n", + " max_snow_metric max_snow_imperial min_temp_imperial \\\n", + "756373 76.20 30.000000 0.0 \n", + "756374 91.44 14.173228 102.6 \n", + "756375 45.72 7.086614 93.6 \n", + "756376 76.20 11.811024 108.0 \n", + "756377 NaN NaN NaN \n", + "\n", + " max_temp_imperial min_temp_metric max_temp_metric min_wind_metric \\\n", + "756373 5.0 -17.777778 -15.000000 0.000000 \n", + "756374 115.2 -3.888889 0.000000 16.092694 \n", + "756375 102.6 -6.666667 -3.888889 19.311233 \n", + "756376 117.0 -2.222222 0.555556 16.092694 \n", + "756377 NaN NaN NaN NaN \n", + "\n", + " max_wind_metric min_wind_imperial max_wind_imperial \\\n", + "756373 27.357580 0.0000 17.000 \n", + "756374 40.231735 6.2140 15.535 \n", + "756375 24.139041 7.4568 9.321 \n", + "756376 16.092694 6.2140 6.214 \n", + "756377 NaN NaN NaN \n", + "\n", + " ui geohash_circle circle_id id latitude \\\n", + "756373 67.0833-162.9667_2004 b7uj b7ujes5 USR0000AMTN 67.1414 \n", + "756374 67.1-162.8333_1985 b7uj b7ujwxh USR0000AMTN 67.1414 \n", + "756375 67.1-162.8333_1986 b7uj b7ujwxh USR0000AMTN 67.1414 \n", + "756376 67.1-162.8333_1987 b7uj b7ujwxh USR0000AMTN 67.1414 \n", + "756377 71.3333-156.6667_1976 bs8t bs8tfcy USW00027502 71.2833 \n", + "\n", + " longitude elevation state name gsn_flag \\\n", + "756373 -162.9944 246.6 AK MT. NOAK ALASKA \n", + "756374 -162.9944 246.6 AK MT. NOAK ALASKA \n", + "756375 -162.9944 246.6 AK MT. NOAK ALASKA \n", + "756376 -162.9944 246.6 AK MT. NOAK ALASKA \n", + "756377 -156.7814 9.4 AK BARROW POST ROGERS AP GSN \n", + "\n", + " hcn_crn_flag wmoid geohash_station temp_min_value temp_max_value \\\n", + "756373 NaN b7uj NaN NaN \n", + "756374 NaN b7uj NaN NaN \n", + "756375 NaN b7uj NaN NaN \n", + "756376 NaN b7uj NaN NaN \n", + "756377 70026.0 bs8t -267.0 -206.0 \n", + "\n", + " precipitation_value temp_avg snow snwd am_rain pm_rain am_snow \\\n", + "756373 NaN NaN NaN NaN 3 3 3 \n", + "756374 NaN NaN NaN NaN 3 3 2 \n", + "756375 NaN NaN NaN NaN 3 3 2 \n", + "756376 NaN NaN NaN NaN 3 3 1 \n", + "756377 3.0 NaN 3.0 102.0 NaN NaN NaN \n", + "\n", + " pm_snow circle_elev elevation_source block_fips county_fips \\\n", + "756373 3 22.33 ghcn_d NaN NaN \n", + "756374 2 42.54 ghcn_d NaN NaN \n", + "756375 2 42.54 ghcn_d NaN NaN \n", + "756376 1 42.54 ghcn_d NaN NaN \n", + "756377 NaN 0.31 ghcn_d NaN NaN \n", + "\n", + " Ecosys_circle Usgsid_sys_circle Nlcd_code_circle Nlcd_circle \\\n", + "756373 NaN NaN NaN NaN \n", + "756374 NaN NaN NaN NaN \n", + "756375 NaN NaN NaN NaN \n", + "756376 NaN NaN NaN NaN \n", + "756377 NaN NaN NaN NaN \n", + "\n", + " Ecosys_station Usgsid_sys_station Nlcd_code_station Nlcd_station \n", + "756373 NaN NaN NaN NaN \n", + "756374 NaN NaN NaN NaN \n", + "756375 NaN NaN NaN NaN \n", + "756376 NaN NaN NaN NaN \n", + "756377 NaN NaN NaN NaN " + ] + }, + "execution_count": 231, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "raw_data.tail()" + ] + }, + { + "cell_type": "code", + "execution_count": 232, "metadata": {}, "outputs": [ { @@ -174,10 +830,14 @@ " \n", " \n", " \n", + " ui\n", " count_year\n", + " count_date\n", " circle_name\n", " circle_id\n", + " Ecosys_circle\n", " specific_circle_ecosystem\n", + " Nlcd_code_circle\n", " macro_circle_ecosystem\n", " circle_elevation\n", " circle_lat\n", @@ -185,7 +845,9 @@ " circle_min_temp\n", " circle_max_temp\n", " noaa_id\n", + " Ecosys_station\n", " specific_station_ecosystem\n", + " Nlcd_code_station\n", " macro_station_ecosystem\n", " noaa_elevation\n", " noaa_lat\n", @@ -197,11 +859,15 @@ " \n", " \n", " 0\n", + " 19.4333-155.2833_1955\n", " 1955\n", + " 1955-01-01\n", " Hawai'i: Volcano N.P.\n", " 8e3wd3w\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", " 1228.18\n", " 19.4333\n", " -155.2833\n", @@ -210,6 +876,8 @@ " USC00511303\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", " 1210.40\n", " 19.4297\n", " -155.2561\n", @@ -218,11 +886,15 @@ " \n", " \n", " 1\n", + " 19.4333-155.2833_1956\n", " 1956\n", + " 1955-12-31\n", " Hawai'i: Volcano N.P.\n", " 8e3wd3w\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", " 1228.18\n", " 19.4333\n", " -155.2833\n", @@ -231,6 +903,8 @@ " USC00511303\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", " 1210.40\n", " 19.4297\n", " -155.2561\n", @@ -239,19 +913,25 @@ " \n", " \n", " 2\n", + " 19.4333-155.2833_1968\n", " 1968\n", + " 1967-12-30\n", " Hawai'i: Volcano N.P.\n", " 8e3wd3w\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", " 1228.18\n", " 19.4333\n", " -155.2833\n", - " 54.0\n", - " 66.0\n", + " 12.222222\n", + " 18.888889\n", " US1HIHI0013\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", " 1059.20\n", " 19.4391\n", " -155.2156\n", @@ -260,19 +940,25 @@ " \n", " \n", " 3\n", + " 19.4333-155.2833_1968\n", " 1968\n", + " 1967-12-30\n", " Hawai'i: Volcano N.P.\n", " 8e3wd3w\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", " 1228.18\n", " 19.4333\n", " -155.2833\n", - " 54.0\n", - " 66.0\n", + " 12.222222\n", + " 18.888889\n", " US1HIHI0071\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", " 1194.80\n", " 19.4414\n", " -155.2487\n", @@ -281,19 +967,25 @@ " \n", " \n", " 4\n", + " 19.4333-155.2833_1968\n", " 1968\n", + " 1967-12-30\n", " Hawai'i: Volcano N.P.\n", " 8e3wd3w\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", " 1228.18\n", " 19.4333\n", " -155.2833\n", - " 54.0\n", - " 66.0\n", + " 12.222222\n", + " 18.888889\n", " USC00514563\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", " 1079.87\n", " 19.4094\n", " -155.2608\n", @@ -305,12 +997,19 @@ "" ], "text/plain": [ - " count_year circle_name circle_id specific_circle_ecosystem \\\n", - "0 1955 Hawai'i: Volcano N.P. 8e3wd3w NaN \n", - "1 1956 Hawai'i: Volcano N.P. 8e3wd3w NaN \n", - "2 1968 Hawai'i: Volcano N.P. 8e3wd3w NaN \n", - "3 1968 Hawai'i: Volcano N.P. 8e3wd3w NaN \n", - "4 1968 Hawai'i: Volcano N.P. 8e3wd3w NaN \n", + " ui count_year count_date circle_name \\\n", + "0 19.4333-155.2833_1955 1955 1955-01-01 Hawai'i: Volcano N.P. \n", + "1 19.4333-155.2833_1956 1956 1955-12-31 Hawai'i: Volcano N.P. \n", + "2 19.4333-155.2833_1968 1968 1967-12-30 Hawai'i: Volcano N.P. \n", + "3 19.4333-155.2833_1968 1968 1967-12-30 Hawai'i: Volcano N.P. \n", + "4 19.4333-155.2833_1968 1968 1967-12-30 Hawai'i: Volcano N.P. \n", + "\n", + " circle_id Ecosys_circle specific_circle_ecosystem Nlcd_code_circle \\\n", + "0 8e3wd3w NaN NaN NaN \n", + "1 8e3wd3w NaN NaN NaN \n", + "2 8e3wd3w NaN NaN NaN \n", + "3 8e3wd3w NaN NaN NaN \n", + "4 8e3wd3w NaN NaN NaN \n", "\n", " macro_circle_ecosystem circle_elevation circle_lat circle_lon \\\n", "0 NaN 1228.18 19.4333 -155.2833 \n", @@ -319,47 +1018,51 @@ "3 NaN 1228.18 19.4333 -155.2833 \n", "4 NaN 1228.18 19.4333 -155.2833 \n", "\n", - " circle_min_temp circle_max_temp noaa_id specific_station_ecosystem \\\n", - "0 NaN NaN USC00511303 NaN \n", - "1 NaN NaN USC00511303 NaN \n", - "2 54.0 66.0 US1HIHI0013 NaN \n", - "3 54.0 66.0 US1HIHI0071 NaN \n", - "4 54.0 66.0 USC00514563 NaN \n", - "\n", - " macro_station_ecosystem noaa_elevation noaa_lat noaa_lon noaa_min_temp \\\n", - "0 NaN 1210.40 19.4297 -155.2561 100.0 \n", - "1 NaN 1210.40 19.4297 -155.2561 117.0 \n", - "2 NaN 1059.20 19.4391 -155.2156 NaN \n", - "3 NaN 1194.80 19.4414 -155.2487 NaN \n", - "4 NaN 1079.87 19.4094 -155.2608 NaN \n", - "\n", - " noaa_max_temp \n", - "0 161.0 \n", - "1 189.0 \n", - "2 NaN \n", - "3 NaN \n", - "4 NaN " + " circle_min_temp circle_max_temp noaa_id Ecosys_station \\\n", + "0 NaN NaN USC00511303 NaN \n", + "1 NaN NaN USC00511303 NaN \n", + "2 12.222222 18.888889 US1HIHI0013 NaN \n", + "3 12.222222 18.888889 US1HIHI0071 NaN \n", + "4 12.222222 18.888889 USC00514563 NaN \n", + "\n", + " specific_station_ecosystem Nlcd_code_station macro_station_ecosystem \\\n", + "0 NaN NaN NaN \n", + "1 NaN NaN NaN \n", + "2 NaN NaN NaN \n", + "3 NaN NaN NaN \n", + "4 NaN NaN NaN \n", + "\n", + " noaa_elevation noaa_lat noaa_lon noaa_min_temp noaa_max_temp \n", + "0 1210.40 19.4297 -155.2561 100.0 161.0 \n", + "1 1210.40 19.4297 -155.2561 117.0 189.0 \n", + "2 1059.20 19.4391 -155.2156 NaN NaN \n", + "3 1194.80 19.4414 -155.2487 NaN NaN \n", + "4 1079.87 19.4094 -155.2608 NaN NaN " ] }, - "execution_count": 5, + "execution_count": 232, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Pulling out temperature data and renaming columns for clarification\n", - "temp_df_raw = raw_data[['count_year',\n", + "temp_df_raw = raw_data[['ui','count_year', 'count_date',\n", " 'circle_name', \n", " 'circle_id',\n", + " 'Ecosys_circle',\n", " 'Usgsid_sys_circle',\n", + " 'Nlcd_code_circle',\n", " 'Nlcd_circle',\n", " 'circle_elev',\n", " 'lat',\n", " 'lon',\n", - " 'min_temp',\n", - " 'max_temp',\n", + " 'min_temp_metric',\n", + " 'max_temp_metric',\n", " 'id',\n", + " 'Ecosys_station',\n", " 'Usgsid_sys_station',\n", + " 'Nlcd_code_station',\n", " 'Nlcd_station',\n", " 'elevation',\n", " 'latitude',\n", @@ -376,8 +1079,8 @@ " 'Nlcd_circle':'macro_circle_ecosystem',\n", " 'lat':'circle_lat',\n", " 'lon':'circle_lon',\n", - " 'min_temp':'circle_min_temp',\n", - " 'max_temp':'circle_max_temp',\n", + " 'min_temp_metric':'circle_min_temp',\n", + " 'max_temp_metric':'circle_max_temp',\n", " 'temp_unit':'circle_temp_unit',\n", " 'id':'noaa_id',\n", " 'Usgsid_sys_station':'specific_station_ecosystem',\n", @@ -394,6 +1097,24 @@ "temp_df.head()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Convert Noaa tempertures from 10th of degrees C to degrees Celecus \n", + "Reference: https://docs.opendata.aws/noaa-ghcn-pds/readme.html" + ] + }, + { + "cell_type": "code", + "execution_count": 233, + "metadata": {}, + "outputs": [], + "source": [ + "temp_df['noaa_min_temp'] = temp_df['noaa_min_temp'] / 10\n", + "temp_df['noaa_max_temp'] = temp_df['noaa_max_temp'] / 10" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -403,12 +1124,73 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 235, "metadata": {}, "outputs": [], "source": [ "temp_df['circle_average_temp'] = temp_df[['circle_min_temp', 'circle_max_temp']].mean(axis=1)\n", - "temp_df['noaa_average_temp'] = temp_df[['noaa_min_temp', 'noaa_max_temp']].mean(axis=1)" + "temp_df['noaa_average_temp'] = temp_df[['noaa_min_temp', 'noaa_max_temp']].mean(axis=1)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Calulcate if the circle and station share ecosystem classifyers " + ] + }, + { + "cell_type": "code", + "execution_count": 236, + "metadata": {}, + "outputs": [], + "source": [ + "temp_df['same_ecosys'] = temp_df.Ecosys_circle.astype('Int64') == temp_df.Ecosys_station.astype('Int64')\n", + "temp_df['same_nlcd'] = temp_df.Nlcd_code_circle.astype('Int64') == temp_df.Nlcd_code_station.astype('Int64')" + ] + }, + { + "cell_type": "code", + "execution_count": 237, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False 253761\n", + "True 155928\n", + "Name: same_ecosys, dtype: Int64" + ] + }, + "execution_count": 237, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "temp_df['same_ecosys'].value_counts(dropna = True)" + ] + }, + { + "cell_type": "code", + "execution_count": 238, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True 247630\n", + "False 166281\n", + "Name: same_nlcd, dtype: Int64" + ] + }, + "execution_count": 238, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "temp_df['same_nlcd'].value_counts(dropna = True)" ] }, { @@ -434,7 +1216,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 239, "metadata": {}, "outputs": [], "source": [ @@ -473,7 +1255,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 240, "metadata": {}, "outputs": [], "source": [ @@ -490,7 +1272,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 241, "metadata": {}, "outputs": [], "source": [ @@ -512,6 +1294,23 @@ "## Missing Data" ] }, + { + "cell_type": "code", + "execution_count": 242, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The number of rows in the dataset is: 756378\n" + ] + } + ], + "source": [ + "print(f\"The number of rows in the dataset is: {temp_df.shape[0]}\")" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -521,7 +1320,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 243, "metadata": {}, "outputs": [ { @@ -542,20 +1341,89 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Counting number of temperature measuremnts that are missing" + "#### The number of unique cbc counts in the data\n", + "A cbc count in a given year will appear multiple times in the dataset for each reference station it is matched with" + ] + }, + { + "cell_type": "code", + "execution_count": 244, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Number of unique cbc counts: 80765\n" + ] + } + ], + "source": [ + "print(f\" Number of unique cbc counts: {temp_df['ui'].nunique()}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### The number of unique noaa stations in the data\n", + "A NOAA station used as a reference for volunteer reported data might appear multiple time over the years as a count is repeated each year." + ] + }, + { + "cell_type": "code", + "execution_count": 331, + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'noaa_id'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m~/.pyenv/versions/funhacks371/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 2645\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2646\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2647\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mKeyError\u001b[0m: 'noaa_id'", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\" Number of unique noaa stations: {temp_df['noaa_id'].nunique()}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/.pyenv/versions/funhacks371/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 2798\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnlevels\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2799\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_multilevel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2800\u001b[0;31m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2801\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_integer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2802\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.pyenv/versions/funhacks371/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 2646\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2647\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2648\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_cast_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2649\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtolerance\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtolerance\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2650\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mKeyError\u001b[0m: 'noaa_id'" + ] + } + ], + "source": [ + "print(f\" Number of unique noaa stations: {temp_df['noaa_id'].nunique()}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Counting number of temperature measuremnts that are missing\n", + "Note:, these numbers represent repeats of the same circles multiple times " ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 246, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Number of missing CBC Min Temps : 26942\n", - "Number of missing CBC Max Temps : 26960\n", + "Number of missing CBC Min Temps : 26996\n", + "Number of missing CBC Max Temps : 27349\n", "Number of missing NOAA Min Temps : 675297\n", "Number of missing NOAA Max Temps : 675285\n" ] @@ -570,360 +1438,3454 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 247, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of rows missing both Min and Max Temps from CBC : 26938\n", + "Number of rows missing both Min and Max Temps from NOAA: 675076\n", + "\n", + "Number of rows missing all temperature data : 7668\n" + ] + } + ], + "source": [ + "print(f\"Number of rows missing both Min and Max Temps from CBC : {temp_df.loc[temp_df['circle_min_temp'].isna() & temp_df['circle_max_temp'].isna()].shape[0]}\")\n", + "print(f\"Number of rows missing both Min and Max Temps from NOAA: {temp_df.loc[temp_df['noaa_min_temp'].isna() & temp_df['noaa_max_temp'].isna()].shape[0]}\")\n", + "print()\n", + "print(f\"Number of rows missing all temperature data : {temp_df.loc[temp_df['circle_min_temp'].isna() & temp_df['circle_max_temp'].isna() & temp_df['noaa_min_temp'].isna() & temp_df['noaa_max_temp'].isna()].shape[0]}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 248, + "metadata": {}, + "outputs": [], + "source": [ + "# Add this data into the dataframe \n", + "temp_df['cbc_temp_present'] = np.where(temp_df['circle_min_temp'].isna() & temp_df['circle_max_temp'], False, True)\n", + "temp_df['station_temp_present'] = np.where(temp_df['noaa_min_temp'].isna() & temp_df['noaa_max_temp'], False, True)\n", + "\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "-----" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Out of Bounds Data " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Temperature Data\n", + "There are a number of outliers in the data set that could highly skew analysis. Any rows with a temperature outside of a min or max recorded temperature in the United States will be dropped.\n", + "\n", + "To be conservative in data dropping we'll only using on max and one min for the entire country rather than by state or other locality. Additionally we'll check by each min/max temp for circles and stations to get an idea on if one is more error prone than another.\n", + "\n", + "Data: https://en.wikipedia.org/wiki/U.S._state_and_territory_temperature_extremes" + ] + }, + { + "cell_type": "code", + "execution_count": 250, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Number of CBC rows missing both Min and Max Temps : 26884\n", - "Number of NOAA rows missing both Min and Max Temps : 675076\n", + "Number of CBC measurments outside max : 0\n", + "Number of NOAA measurments outside max : 1\n", + "\n", + "Number of CBC measurments outside min : 0\n", + "Number of NOAA measurments outside min : 0\n", "\n", - "Number of rows missing all temperature data : 7621\n" + "Number of NOAA stations with both outside : 0\n" + ] + } + ], + "source": [ + "# Creating variables for each drop condition\n", + "circle_over_max_temp = temp_df.loc[temp_df[\"circle_max_temp\"]>max_temp_check]\n", + "circle_under_min_temp = temp_df.loc[temp_df[\"circle_min_temp\"]max_temp_check]\n", + "noaa_under_min_temp = temp_df.loc[temp_df[\"noaa_min_temp\"] max_temp_check) & (temp_df[\"noaa_min_temp\"] < min_temp_check)].shape[0]}')\n", + "\n", + "# Setting list of indices to drop\n", + "index_drop_list = list(circle_over_max_temp.index) + list(circle_under_min_temp.index) + list(noaa_over_max_temp.index) + list(noaa_under_min_temp.index)\n", + "\n", + "# Dropping All out of bout roundsRows\n", + "temp_df.drop(index_drop_list, inplace=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Distance Data" + ] + }, + { + "cell_type": "code", + "execution_count": 251, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of rows dropped outside of distance threshold: 558454\n" ] } ], "source": [ - "print(f\"Number of CBC rows missing both Min and Max Temps : {temp_df.loc[temp_df['circle_min_temp'].isna() & temp_df['circle_max_temp'].isna()].shape[0]}\")\n", - "print(f\"Number of NOAA rows missing both Min and Max Temps : {temp_df.loc[temp_df['noaa_min_temp'].isna() & temp_df['noaa_max_temp'].isna()].shape[0]}\")\n", - "print()\n", - "print(f\"Number of rows missing all temperature data : {temp_df.loc[temp_df['circle_min_temp'].isna() & temp_df['circle_max_temp'].isna() & temp_df['noaa_min_temp'].isna() & temp_df['noaa_max_temp'].isna()].shape[0]}\")" + "# Dropping rows with distance differences larger then set threshold\n", + "temp_df.drop(temp_df[temp_df['distance_diff'] > distance_threshold].index, inplace=True)\n", + "print(f'Number of rows dropped outside of distance threshold: {temp_df.shape[0]}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Elevation Data" + ] + }, + { + "cell_type": "code", + "execution_count": 252, + "metadata": {}, + "outputs": [], + "source": [ + "# Dropping rows with circles and stations that are over the elevation threshold\n", + "temp_df.drop(temp_df[temp_df['elevation_diff'] > elevation_threshold].index, inplace=True)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Examine the Data Remaining for Analysis" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Assessing CBC Range" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Checking to see how many CBC Circle Min Temperatures records are within the bounds of the NOAA Station records" + ] + }, + { + "cell_type": "code", + "execution_count": 253, + "metadata": {}, + "outputs": [], + "source": [ + "temp_df['min_bw_noaa'] = np.where(temp_df['circle_min_temp'].isna(), np.NaN, temp_df['circle_min_temp'].between(temp_df['noaa_min_temp'], temp_df['noaa_max_temp']))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 254, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0 375534\n", + "1.0 21404\n", + "NaN 15167\n", + "Name: min_bw_noaa, dtype: int64" + ] + }, + "execution_count": 254, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "temp_df['min_bw_noaa'].value_counts(dropna = False)" + ] + }, + { + "cell_type": "code", + "execution_count": 255, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of rows (circle-station pairing) where Min temperature is in the bounds of the corresponding NOAA station: 21404.0\n", + "Number of rows (circle-station pairing) where Min temperature is not in the bounds of the corresponding NOAA station: 390701.0\n", + "\n", + "5.0% of circle min temp's lay between\n" + ] + } + ], + "source": [ + "# Counting number of circles that are true\n", + "min_temp_true = temp_df['min_bw_noaa'].sum()\n", + "min_temp_false = temp_df.shape[0] - temp_df['min_bw_noaa'].sum()\n", + "print(f\"Number of rows (circle-station pairing) where Min temperature is in the bounds of the corresponding NOAA station: {temp_df['min_bw_noaa'].sum()}\")\n", + "print(f\"Number of rows (circle-station pairing) where Min temperature is not in the bounds of the corresponding NOAA station: {temp_df.shape[0] - temp_df['min_bw_noaa'].sum()}\")\n", + "print()\n", + "print(f\"{round((min_temp_true/temp_df.shape[0])*100)}% of circle min temp's lay between\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Checking to see how many CBC Circle Max Temperatures records are within the bounds of the NOAA Station records" + ] + }, + { + "cell_type": "code", + "execution_count": 256, + "metadata": {}, + "outputs": [], + "source": [ + "temp_df['max_bw_noaa'] = np.where(temp_df['circle_max_temp'].isna(), np.NaN, temp_df['circle_max_temp'].between(temp_df['noaa_min_temp'], temp_df['noaa_max_temp']))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 259, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0 377489\n", + "1.0 19262\n", + "NaN 15354\n", + "Name: max_bw_noaa, dtype: int64" + ] + }, + "execution_count": 259, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "temp_df['max_bw_noaa'].value_counts(dropna = False)" + ] + }, + { + "cell_type": "code", + "execution_count": 257, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of rows (circle-station pairing) where temperature is in the bounds of the corresponding NOAA station: 19262.0\n", + "Number of rows (circle-station pairing) where temperature is not in the bounds of the corresponding NOAA station: 392843.0\n", + "\n", + "5.0% of stations lay between\n" + ] + } + ], + "source": [ + "# Counting number of circles that are true\n", + "max_temp_true = temp_df['max_bw_noaa'].sum()\n", + "max_temp_false = temp_df.shape[0] - sum(temp_df['max_bw_noaa'])\n", + "print(f\"Number of rows (circle-station pairing) where temperature is in the bounds of the corresponding NOAA station: {temp_df['max_bw_noaa'].sum()}\")\n", + "print(f\"Number of rows (circle-station pairing) where temperature is not in the bounds of the corresponding NOAA station: {temp_df.shape[0] - temp_df['max_bw_noaa'].sum()}\")\n", + "print()\n", + "print(f\"{round((max_temp_true/temp_df.shape[0])*100)}% of stations lay between\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "## Temperature Measurement Goodness\n", + "\n", + "temp_metric = sqrt( (noaa_min_temp - circle_min_temp)^2 + (noaa_max_temp - circle_max_temp)^2 )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Goodness Metric\n", + "temp_goodness = sqrt( (noaa_min_temp - circle_min_temp)^2 + (noaa_max_temp - circle_max_temp)^2 )" + ] + }, + { + "cell_type": "code", + "execution_count": 260, + "metadata": {}, + "outputs": [], + "source": [ + "temp_df['temp_goodness'] = round(np.sqrt(((temp_df['noaa_min_temp'] - temp_df['circle_min_temp'])**2) + ((temp_df['noaa_max_temp'] - temp_df['circle_max_temp'])**2)),2)\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 261, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 34569.000000\n", + "mean 4.361014\n", + "std 4.005240\n", + "min 0.000000\n", + "25% 1.730000\n", + "50% 3.400000\n", + "75% 5.810000\n", + "max 66.660000\n", + "Name: temp_goodness, dtype: float64" + ] + }, + "execution_count": 261, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "temp_df['temp_goodness'].describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 262, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 262, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAD4CAYAAAAO9oqkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAXuklEQVR4nO3dfZBd9X3f8fenItgYPwhMumUkTaXUij3YxDbeAh6nmQVaENhj8YfjgaFFdjXRTIMdp6VjQzItU9vM4DaEGGrTUY0KZDQIQpxIYxNjFXPHk5nwaDDiwYQNyEYasGJL4K4fcOR8+8f9qb2Wd7Xae1d390bv18ydPed7fuec72Eu+uw59+w9qSokSUe3f7TQDUiSFp5hIEkyDCRJhoEkCcNAkgQcs9AN9Oukk06qlStX9rXuD3/4Q44//vj5bWgIRrHvUewZ7HuYRrFnGN2+H3744e9V1S8fXB/ZMFi5ciUPPfRQX+t2Oh0mJibmt6EhGMW+R7FnsO9hGsWeYXT7TvLt6epeJpIkGQaSJMNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEiP8F8iD2LH7ZT50xZf7WnfnNe+d524kaeF5ZiBJMgwkSYaBJAnDQJKEYSBJ4jDCIMmmJHuSPH5Q/aNJvpXkiST/tad+ZZLJJE8nOa+nvqbVJpNc0VNfleT+Vr89ybHzdXCSpMNzOGcGNwNregtJzgLWAm+vqrcCf9DqpwAXAW9t63w+yZIkS4DPAecDpwAXt7EAnwGuq6o3AfuA9YMelCRpbmYNg6r6OrD3oPK/A66pqlfamD2tvhbYUlWvVNVzwCRwentNVtWzVfVTYAuwNkmAs4E72/q3ABcOeEySpDnq94/OfhX4F0muBn4C/MeqehBYBtzXM25XqwE8f1D9DOCNwEtVtX+a8b8gyQZgA8DY2BidTqev5seOg8tP3T/7wGn0u8/5MDU1taD778co9gz2PUyj2DOMbt8z6TcMjgFOBM4E/jlwR5JfmbeuZlBVG4GNAOPj49Xv80dv2LyVa3f0d+g7L+lvn/NhFJ+5Ooo9g30P0yj2DKPb90z6DYNdwBerqoAHkvw9cBKwG1jRM255qzFD/fvA0iTHtLOD3vGSpCHp99bSPwfOAkjyq8CxwPeAbcBFSV6VZBWwGngAeBBY3e4cOpbuh8zbWpjcC3ygbXcdsLXfg5Ek9WfWM4MktwETwElJdgFXAZuATe12058C69o/7E8kuQN4EtgPXFZVP2vb+QhwN7AE2FRVT7RdfALYkuTTwCPATfN4fJKkwzBrGFTVxTMs+tczjL8auHqa+l3AXdPUn6V7t5EkaYH4F8iSJMNAkmQYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAksRhhEGSTUn2tKeaHbzs8iSV5KQ2nyTXJ5lM8liS03rGrkvyTHut66m/K8mOts71STJfBydJOjyHc2ZwM7Dm4GKSFcC5wHd6yufTfe7xamADcGMbeyLdx2WeQfepZlclOaGtcyPwWz3r/cK+JElH1qxhUFVfB/ZOs+g64ONA9dTWArdW133A0iQnA+cB26tqb1XtA7YDa9qy11fVfe0ZyrcCFw52SJKkuZr1GcjTSbIW2F1V3zzoqs4y4Pme+V2tdqj6rmnqM+13A90zDsbGxuh0Ov20z9hxcPmp+/tat999zoepqakF3X8/RrFnsO9hGsWeYXT7nsmcwyDJa4Dfo3uJaKiqaiOwEWB8fLwmJib62s4Nm7dy7Y6+cpCdl/S3z/nQ6XTo95gXyij2DPY9TKPYM4xu3zPp526ifwasAr6ZZCewHPhGkn8C7AZW9Ixd3mqHqi+fpi5JGqI5h0FV7aiqf1xVK6tqJd1LO6dV1YvANuDSdlfRmcDLVfUCcDdwbpIT2gfH5wJ3t2U/SHJmu4voUmDrPB2bJOkwHc6tpbcBfwW8OcmuJOsPMfwu4FlgEvifwG8DVNVe4FPAg+31yVajjflCW+dvgL/o71AkSf2a9cJ5VV08y/KVPdMFXDbDuE3ApmnqDwFvm60PSdKR418gS5IMA0mSYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kSh/eks01J9iR5vKf235J8K8ljSf4sydKeZVcmmUzydJLzeuprWm0yyRU99VVJ7m/125McO58HKEma3eGcGdwMrDmoth14W1X9GvDXwJUASU4BLgLe2tb5fJIlSZYAnwPOB04BLm5jAT4DXFdVbwL2AYd6rKYk6QiYNQyq6uvA3oNqX62q/W32PmB5m14LbKmqV6rqObrPNT69vSar6tmq+imwBVibJMDZwJ1t/VuACwc8JknSHM36DOTD8G+B29v0MrrhcMCuVgN4/qD6GcAbgZd6gqV3/C9IsgHYADA2Nkan0+mr4bHj4PJT988+cBr97nM+TE1NLej++zGKPYN9D9Mo9gyj2/dMBgqDJL8P7Ac2z087h1ZVG4GNAOPj4zUxMdHXdm7YvJVrd/R36Dsv6W+f86HT6dDvMS+UUewZ7HuYRrFnGN2+Z9J3GCT5EPA+4JyqqlbeDazoGba81Zih/n1gaZJj2tlB73hJ0pD0dWtpkjXAx4H3V9WPehZtAy5K8qokq4DVwAPAg8DqdufQsXQ/ZN7WQuRe4ANt/XXA1v4ORZLUr8O5tfQ24K+ANyfZlWQ98N+B1wHbkzya5H8AVNUTwB3Ak8BXgMuq6mftt/6PAHcDTwF3tLEAnwD+Q5JJup8h3DSvRyhJmtWsl4mq6uJpyjP+g11VVwNXT1O/C7hrmvqzdO82kiQtEP8CWZJkGEiSDANJEoaBJAnDQJKEYSBJwjCQJGEYSJKYn28tPaqsvOLLfa+785r3zmMnkjR/PDOQJBkGkiTDQJKEYSBJwjCQJGEYSJI4vIfbbEqyJ8njPbUTk2xP8kz7eUKrJ8n1SSaTPJbktJ511rXxzyRZ11N/V5IdbZ3rk2S+D1KSdGiHc2ZwM7DmoNoVwD1VtRq4p80DnE/3UZergQ3AjdAND+Aq4Ay6D7K56kCAtDG/1bPewfuSJB1hs4ZBVX0d2HtQeS1wS5u+Bbiwp35rdd1H92H3JwPnAduram9V7QO2A2vastdX1X3teci39mxLkjQk/f4F8lhVvdCmXwTG2vQy4Pmecbta7VD1XdPUp5VkA90zDsbGxuh0Ov01fxxcfur+vtYdRL/9HjA1NTXwNoZtFHsG+x6mUewZRrfvmQz8dRRVVUlqPpo5jH1tBDYCjI+P18TERF/buWHzVq7dMfxv4th5ycRA63c6Hfo95oUyij2DfQ/TKPYMo9v3TPq9m+i77RIP7eeeVt8NrOgZt7zVDlVfPk1dkjRE/YbBNuDAHUHrgK099UvbXUVnAi+3y0l3A+cmOaF9cHwucHdb9oMkZ7a7iC7t2ZYkaUhmvVaS5DZgAjgpyS66dwVdA9yRZD3wbeCDbfhdwAXAJPAj4MMAVbU3yaeAB9u4T1bVgQ+lf5vuHUvHAX/RXpKkIZo1DKrq4hkWnTPN2AIum2E7m4BN09QfAt42Wx+SpCPHv0CWJBkGkiTDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQGDIMk/z7JE0keT3JbklcnWZXk/iSTSW5Pcmwb+6o2P9mWr+zZzpWt/nSS8wY7JEnSXPUdBkmWAb8DjFfV24AlwEXAZ4DrqupNwD5gfVtlPbCv1a9r40hySlvvrcAa4PNJlvTblyRp7ga9THQMcFySY4DXAC8AZwN3tuW3ABe26bVtnrb8nCRp9S1V9UpVPUf3+cmnD9iXJGkOZn0G8kyqaneSPwC+A/wY+CrwMPBSVe1vw3YBy9r0MuD5tu7+JC8Db2z1+3o23bvOz0myAdgAMDY2RqfT6av3sePg8lP3zz5wnvXb7wFTU1MDb2PYRrFnsO9hGsWeYXT7nknfYZDkBLq/1a8CXgL+hO5lniOmqjYCGwHGx8drYmKir+3csHkr1+7o+9D7tvOSiYHW73Q69HvMC2UUewb7HqZR7BlGt++ZDHKZ6F8Cz1XV31bV3wFfBN4DLG2XjQCWA7vb9G5gBUBb/gbg+731adaRJA3BIGHwHeDMJK9p1/7PAZ4E7gU+0MasA7a26W1tnrb8a1VVrX5Ru9toFbAaeGCAviRJczTIZwb3J7kT+AawH3iE7iWcLwNbkny61W5qq9wE/HGSSWAv3TuIqKonktxBN0j2A5dV1c/67UuSNHcDXTivqquAqw4qP8s0dwNV1U+A35xhO1cDVw/SiySpf/4FsiTJMJAkGQaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkMWAYJFma5M4k30ryVJJ3JzkxyfYkz7SfJ7SxSXJ9kskkjyU5rWc769r4Z5Ksm3mPkqQjYdAzg88CX6mqtwBvB54CrgDuqarVwD1tHuB8us83Xg1sAG4ESHIi3aelnUH3CWlXHQgQSdJw9B0GSd4A/AbtGcdV9dOqeglYC9zSht0CXNim1wK3Vtd9wNIkJwPnAduram9V7QO2A2v67UuSNHepqv5WTN4BbKT7IPu3Aw8DHwN2V9XSNibAvqpamuRLwDVV9Zdt2T3AJ4AJ4NVV9elW/0/Aj6vqD6bZ5wa6ZxWMjY29a8uWLX31vmfvy3z3x32tOpBTl71hoPWnpqZ47WtfO0/dDMco9gz2PUyj2DOMbt9nnXXWw1U1fnD9mAG2eQxwGvDRqro/yWf5/5eEAKiqStJf2kyjqjbSDSDGx8drYmKir+3csHkr1+4Y5ND7s/OSiYHW73Q69HvMC2UUewb7HqZR7BlGt++ZDPKZwS5gV1Xd3+bvpBsO322Xf2g/97Tlu4EVPesvb7WZ6pKkIek7DKrqReD5JG9upXPoXjLaBhy4I2gdsLVNbwMubXcVnQm8XFUvAHcD5yY5oX1wfG6rSZKGZNBrJR8FNic5FngW+DDdgLkjyXrg28AH29i7gAuASeBHbSxVtTfJp4AH27hPVtXeAfuSJM3BQGFQVY8Cv/BBBN2zhIPHFnDZDNvZBGwapBdJUv/8C2RJkmEgSTIMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSSJwZ9noDlYecWXB1r/5jXHz1MnkvTzBj4zSLIkySPtgfckWZXk/iSTSW5vD74hyava/GRbvrJnG1e2+tNJzhu0J0nS3MzHZaKPAU/1zH8GuK6q3gTsA9a3+npgX6tf18aR5BTgIuCtwBrg80mWzENfkqTDNFAYJFkOvBf4QpsPcDZwZxtyC3Bhm17b5mnLz2nj1wJbquqVqnqO7mMxTx+kL0nS3Az6mcEfAR8HXtfm3wi8VFX72/wuYFmbXgY8D1BV+5O83MYvA+7r2WbvOj8nyQZgA8DY2BidTqevpseOg8tP3T/7wEVmamqq72NeKKPYM9j3MI1izzC6fc+k7zBI8j5gT1U9nGRi/lqaWVVtBDYCjI+P18REf7u9YfNWrt0xep+d37zmePo95oXS6XRGrmew72EaxZ5hdPueySD/Ir4HeH+SC4BXA68HPgssTXJMOztYDuxu43cDK4BdSY4B3gB8v6d+QO86kqQh6Pszg6q6sqqWV9VKuh8Af62qLgHuBT7Qhq0DtrbpbW2etvxrVVWtflG722gVsBp4oN++JElzdySulXwC2JLk08AjwE2tfhPwx0kmgb10A4SqeiLJHcCTwH7gsqr62RHoS5I0g3kJg6rqAJ02/SzT3A1UVT8BfnOG9a8Grp6PXiRJc+fXUUiSDANJkmEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEgOEQZIVSe5N8mSSJ5J8rNVPTLI9yTPt5wmtniTXJ5lM8liS03q2ta6NfybJupn2KUk6MgY5M9gPXF5VpwBnApclOQW4ArinqlYD97R5gPPpPt94NbABuBG64QFcBZxB9wlpVx0IEEnScPQdBlX1QlV9o03/H+ApYBmwFrilDbsFuLBNrwVura77gKVJTgbOA7ZX1d6q2gdsB9b025ckae7m5RnISVYC7wTuB8aq6oW26EVgrE0vA57vWW1Xq81Un24/G+ieVTA2Nkan0+mr37Hj4PJT9/e17kKamprq+5gXyij2DPY9TKPYM4xu3zMZOAySvBb4U+B3q+oHSf7fsqqqJDXoPnq2txHYCDA+Pl4TExN9beeGzVu5dse85OBQ3bzmePo95oXS6XRGrmew72EaxZ5hdPueyUB3EyX5JbpBsLmqvtjK322Xf2g/97T6bmBFz+rLW22muiRpSAa5myjATcBTVfWHPYu2AQfuCFoHbO2pX9ruKjoTeLldTrobODfJCe2D43NbTZI0JINcK3kP8G+AHUkebbXfA64B7kiyHvg28MG27C7gAmAS+BHwYYCq2pvkU8CDbdwnq2rvAH1Jkuao7zCoqr8EMsPic6YZX8BlM2xrE7Cp314kSYMZvU9Rj2I7dr/Mh674cl/r7rzmvfPcjaR/SPw6CkmSYSBJMgwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEn43URHjZV9fqcR+L1G0tHAMwNJkmEgSTIMJEksos8MkqwBPgssAb5QVdcscEtqBvm84eY1x89jJ5KOlEVxZpBkCfA54HzgFODiJKcsbFeSdPRYLGcGpwOTVfUsQJItwFrgyQXtSgMb5Olsg/IuKOnwLZYwWAY83zO/Czjj4EFJNgAb2uxUkqf73N9JwPf6XHfB/M4I9r2QPeczA60+cv+tm1HsexR7htHt+59OV1wsYXBYqmojsHHQ7SR5qKrG56GloRrFvkexZ7DvYRrFnmF0+57JovjMANgNrOiZX95qkqQhWCxh8CCwOsmqJMcCFwHbFrgnSTpqLIrLRFW1P8lHgLvp3lq6qaqeOIK7HPhS0wIZxb5HsWew72EaxZ5hdPueVqpqoXuQJC2wxXKZSJK0gAwDSdLRFQZJ1iR5OslkkisWup+ZJNmUZE+Sx3tqJybZnuSZ9vOEhexxOklWJLk3yZNJnkjysVZftL0neXWSB5J8s/X8X1p9VZL723vl9nZjw6KTZEmSR5J8qc0v+r6T7EyyI8mjSR5qtUX7HgFIsjTJnUm+leSpJO9e7D3P1VETBiP2lRc3A2sOql0B3FNVq4F72vxisx+4vKpOAc4ELmv/jRdz768AZ1fV24F3AGuSnAl8Briuqt4E7APWL2CPh/Ix4Kme+VHp+6yqekfPffqL+T0C3e9N+0pVvQV4O93/5ou957mpqqPiBbwbuLtn/krgyoXu6xD9rgQe75l/Gji5TZ8MPL3QPR7GMWwF/tWo9A68BvgG3b9+/x5wzHTvncXyovv3OPcAZwNfAjIife8ETjqotmjfI8AbgOdoN9yMQs/9vI6aMwOm/8qLZQvUSz/GquqFNv0iMLaQzcwmyUrgncD9LPLe26WWR4E9wHbgb4CXqmp/G7JY3yt/BHwc+Ps2/0ZGo+8Cvprk4fYVM7C43yOrgL8F/le7JPeFJMezuHues6MpDP7BqO6vIov2nuAkrwX+FPjdqvpB77LF2HtV/ayq3kH3N+3TgbcscEuzSvI+YE9VPbzQvfTh16vqNLqXbC9L8hu9Cxfhe+QY4DTgxqp6J/BDDroktAh7nrOjKQxG/SsvvpvkZID2c88C9zOtJL9ENwg2V9UXW3kkeq+ql4B76V5eWZrkwB9lLsb3ynuA9yfZCWyhe6nosyz+vqmq3e3nHuDP6AbwYn6P7AJ2VdX9bf5OuuGwmHues6MpDEb9Ky+2Aeva9Dq61+MXlSQBbgKeqqo/7Fm0aHtP8stJlrbp4+h+xvEU3VD4QBu2qHoGqKorq2p5Va2k+17+WlVdwiLvO8nxSV53YBo4F3icRfweqaoXgeeTvLmVzqH79fqLtue+LPSHFsN8ARcAf033mvDvL3Q/h+jzNuAF4O/o/laynu714HuAZ4D/DZy40H1O0/ev0z1Vfgx4tL0uWMy9A78GPNJ6fhz4z63+K8ADwCTwJ8CrFrrXQxzDBPClUei79ffN9nriwP+Hi/k90vp7B/BQe5/8OXDCYu95ri+/jkKSdFRdJpIkzcAwkCQZBpIkw0CShGEgScIwkCRhGEiSgP8L/hcrrhlezLQAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "temp_df['temp_goodness'].hist(bins = 20)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Catagories\n", + "Values in catagories can be changed and then applied to dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 263, + "metadata": {}, + "outputs": [], + "source": [ + "# Function to assign grade scores\n", + "def assign_grade(metric_score):\n", + " if metric_score <= excellent_score:\n", + " return 'excellent'\n", + " elif metric_score <= good_score:\n", + " return 'good'\n", + " elif metric_score <= fair_score:\n", + " return 'fair'\n", + " else:\n", + " return 'poor'" + ] + }, + { + "cell_type": "code", + "execution_count": 264, + "metadata": {}, + "outputs": [], + "source": [ + "# Applying the scores\n", + "temp_df['goodness_grade'] = temp_df['temp_goodness'].apply(lambda metric_score: assign_grade(metric_score))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Group by Circles Now For Final Counts \n", + "num - The number of stations for a circle for a count date\n", + "\n", + "num_notna - The number of stations that are not null for their temp measurments\n", + "\n", + "\n", + "The next sections for \"e_\" and \"n_\" are the same as abouve, except e is only stations for a circle that\n", + "are in the same ecosystem and n is stations with the same nlcd code.\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 265, + "metadata": {}, + "outputs": [], + "source": [ + "def temp_calc(dfg):\n", + " is_na_max = dfg.max_bw_noaa.isna()\n", + " is_na_min = dfg.min_bw_noaa.isna()\n", + "\n", + " # there can be weirdness with boolean not being promoted to ints, so add zero\n", + " # Count the number of stations for this circle\n", + " # Station temp present is true for is both min and max are present in the \n", + " # station data\n", + " num = dfg.station_temp_present.size + 0\n", + " # Count the number of stations where both temp mesurments are not NA \n", + " num_notna = dfg.station_temp_present.size + 0\n", + " # Count the number of stations where the colunteer submitted average was between the NOAA min and max\n", + " if dfg.min_bw_noaa.isnull().all():\n", + " num_min_bw_noaa = np.NaN\n", + " else:\n", + " num_min_bw_noaa = dfg.min_bw_noaa.sum()\n", + " \n", + " if dfg.max_bw_noaa.isnull().all():\n", + " num_max_bw_noaa = np.NaN\n", + " else:\n", + " num_max_bw_noaa = dfg.max_bw_noaa.sum()\n", + " \n", + " \n", + " # Number of Stations with a 'each Classifyer'\n", + " num_excellent = dfg.loc[dfg.goodness_grade == 'excellent'].shape[0]\n", + " num_good = dfg.loc[dfg.goodness_grade == 'good'].shape[0]\n", + " num_fair = dfg.loc[dfg.goodness_grade == 'fair'].shape[0]\n", + " num_poor = dfg.loc[dfg.goodness_grade == 'poor'].shape[0]\n", + " \n", + " try: \n", + " goodness_mode = mode(dfg.goodness_grade.values.tolist())\n", + " except: \n", + " goodness_mode = np.NaN\n", + " \n", + " \n", + "\n", + " \n", + " ## BREAKDOWN By Ecosys \n", + " e = dfg.loc[dfg.same_ecosys]\n", + " # Count the number of stations for this circle\n", + " e_num = e.station_temp_present.size + 0\n", + " # Count the number of stations where both temp mesurments are not NA \n", + " e_num_notna = e.station_temp_present.size + 0\n", + " # Count the number of stations where the colunteer submitted average was between the NOAA min and max\n", + " if e.min_bw_noaa.isnull().all():\n", + " e_num_min_bw_noaa = np.NaN\n", + " else:\n", + " e_num_min_bw_noaa = e.min_bw_noaa.sum()\n", + " \n", + " if e.max_bw_noaa.isnull().all():\n", + " e_num_max_bw_noaa = np.NaN\n", + " else:\n", + " e_num_max_bw_noaa = e.max_bw_noaa.sum()\n", + " \n", + " # Number of Stations with a 'each Classifyer'\n", + " e_num_excellent = e.loc[dfg.goodness_grade == 'excellent'].shape[0]\n", + " e_num_good = e.loc[e.goodness_grade == 'good'].shape[0]\n", + " e_num_fair = e.loc[e.goodness_grade == 'fair'].shape[0]\n", + " e_num_poor = e.loc[e.goodness_grade == 'poor'].shape[0]\n", + " \n", + " try: \n", + " e_goodness_mode = mode(e.goodness_grade.values.tolist())\n", + " except: \n", + " e_goodness_mode = np.NaN\n", + " \n", + " \n", + " ## BREAKDOWN By NCLD \n", + " n = dfg.loc[dfg.same_nlcd]\n", + " # Count the number of stations for this circle\n", + " n_num = n.station_temp_present.size + 0\n", + " # Count the number of stations where both temp mesurments are not NA \n", + " n_num_notna = n.station_temp_present.size + 0\n", + " # Count the number of stations where the colunteer submitted average was between the NOAA min and max\n", + " if n.min_bw_noaa.isnull().all():\n", + " n_num_min_bw_noaa = np.NaN\n", + " else:\n", + " n_num_min_bw_noaa = n.min_bw_noaa.sum()\n", + " \n", + " if n.max_bw_noaa.isnull().all():\n", + " n_num_max_bw_noaa = np.NaN\n", + " else:\n", + " n_num_max_bw_noaa = n.max_bw_noaa.sum()\n", + " \n", + " # Number of Stations with a 'each Classifyer'\n", + " n_num_excellent = n.loc[dfg.goodness_grade == 'excellent'].shape[0]\n", + " n_num_good = n.loc[n.goodness_grade == 'good'].shape[0]\n", + " n_num_fair = n.loc[n.goodness_grade == 'fair'].shape[0]\n", + " n_num_poor = n.loc[n.goodness_grade == 'poor'].shape[0]\n", + " \n", + " try: \n", + " n_goodness_mode = mode(n.goodness_grade.values.tolist())\n", + " except: \n", + " n_goodness_mode = np.NaN\n", + " \n", + "# # havent figured out how to inlinse this yet ...\n", + " lowest_id = dfg.elevation_diff.idxmin() if (num > 0) else np.NaN\n", + "\n", + " try:\n", + " lowest_id_na = dfg.loc[~is_na_max, 'elevation_diff'].idxmin() if ((num_notna > 0) and dfg.loc[~is_na_max, 'elevation_diff'].shape[0] > 0) else np.NaN\n", + " except:\n", + " lowest_id_na = np.NaN\n", + " \n", + " \n", + " \n", + " return pd.Series({\n", + " 'num' : num,\n", + " 'num_notna' : num_notna,\n", + " 'num_min_bw_noaa' : num_min_bw_noaa,\n", + " 'num_max_bw_noaa' : num_max_bw_noaa,\n", + " 'num_excellent' : num_excellent,\n", + " 'num_good' : num_good,\n", + " 'num_fair' : num_fair,\n", + " 'num_poor' : num_poor,\n", + " 'goodness_mode' : goodness_mode,\n", + " \n", + " 'e_num' : e_num,\n", + " 'e_num_notna' : e_num_notna,\n", + " 'e_num_min_bw_noaa' : e_num_min_bw_noaa,\n", + " 'e_num_max_bw_noaa' : e_num_max_bw_noaa,\n", + " 'e_num_excellent' : e_num_excellent,\n", + " 'e_num_good' : e_num_good,\n", + " 'e_num_fair' : e_num_fair,\n", + " 'e_num_poor' : e_num_poor,\n", + " 'e_goodness_mode' : e_goodness_mode,\n", + " \n", + " \n", + " 'n_num' : n_num,\n", + " 'n_num_notna' : n_num_notna,\n", + " 'n_num_min_bw_noaa' : n_num_min_bw_noaa,\n", + " 'n_num_max_bw_noaa' : n_num_max_bw_noaa,\n", + " 'n_num_excellent' : n_num_excellent,\n", + " 'n_num_good' : n_num_good,\n", + " 'n_num_fair' : n_num_fair,\n", + " 'n_num_poor' : n_num_poor,\n", + " 'n_goodness_mode' : n_goodness_mode,\n", + " \n", + "\n", + " 'p' : num_max_bw_noaa / num_notna if (num_notna > 0) else np.NaN,\n", + " 'e_p' : e_num_max_bw_noaa / e_num_notna if (e_num_notna > 0) else np.NaN,\n", + " 'n_p' : n_num_max_bw_noaa / n_num_notna if (n_num_notna > 0) else np.NaN,\n", + " \n", + " \n", + " # Goodness Closest \n", + " 'goodness_closest' : dfg.loc[dfg.distance_diff.idxmin(), 'goodness_grade'] if (num > 0) else pd.NA,\n", + " \n", + " # Max Closest\n", + " 'max_temp_closest' : dfg.loc[dfg.distance_diff.idxmin(), 'max_bw_noaa'] if (num > 0) else pd.NA,\n", + " 'max_temp_closest_value' : dfg.loc[dfg.distance_diff.idxmin(), 'noaa_max_temp'] if (num > 0) else pd.NA,\n", + " \n", + " 'max_temp_closest_notna' : dfg.loc[dfg.loc[~is_na_max, 'distance_diff'].idxmin(), 'max_bw_noaa'] if (num_notna > 0 and num_max_bw_noaa > 0 ) else pd.NA,\n", + " \n", + " # Min Closest\n", + " 'min_temp_closest' : dfg.loc[dfg.distance_diff.idxmin(), 'min_bw_noaa'] if (num > 0) else pd.NA,\n", + " 'min_temp_closest_value' : dfg.loc[dfg.distance_diff.idxmin(), 'noaa_min_temp'] if (num > 0) else pd.NA,\n", + " \n", + " 'min_temp_closest_notna' : dfg.loc[dfg.loc[~is_na_min, 'distance_diff'].idxmin(), 'min_bw_noaa'] if (num_notna > 0 and num_min_bw_noaa > 0 ) else pd.NA,\n", + " \n", + " #Goodness Lowest\n", + " 'goodness_lowest' : pd.NA if pd.isna(lowest_id) else dfg.loc[lowest_id, 'goodness_grade'],\n", + " \n", + " # Max Lowest \n", + " 'max_temp_lowest' : pd.NA if pd.isna(lowest_id) else dfg.loc[lowest_id, 'max_bw_noaa'],\n", + " 'max_temp_lowest_value' : pd.NA if pd.isna(lowest_id) else dfg.loc[lowest_id, 'noaa_max_temp'],\n", + " 'max_temp_lowest_notna' : pd.NA if pd.isna(lowest_id_na) else dfg.loc[lowest_id_na, 'max_bw_noaa'],\n", + " \n", + " # Min Losest\n", + " 'min_temp_lowest' : pd.NA if pd.isna(lowest_id) else dfg.loc[lowest_id, 'min_bw_noaa'],\n", + " 'min_temp_lowest_value' : pd.NA if pd.isna(lowest_id) else dfg.loc[lowest_id, 'noaa_min_temp'],\n", + " 'min_temp_lowest_notna' : pd.NA if pd.isna(lowest_id_na) else dfg.loc[lowest_id_na, 'min_bw_noaa'],\n", + " })\n" + ] + }, + { + "cell_type": "code", + "execution_count": 266, + "metadata": {}, + "outputs": [], + "source": [ + "temp_df = temp_df.set_index(['circle_id', 'count_date', 'noaa_id']).sort_index()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 267, + "metadata": {}, + "outputs": [], + "source": [ + "g = temp_df.groupby(level=['circle_id', 'count_date'])" + ] + }, + { + "cell_type": "code", + "execution_count": 268, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 268, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# The new fields used for Analysis\n", + "num - The number of stations for a circle for a count date \n", + "\n", + "num_notna - The number of stations that are not null for their max temp measurment \n", + "\n", + "num_min_bw_noaa, num_max_bw_noaa - The number of reference stations that bount the circle\n", + "reported min temp and max temp, respectivly. \n", + "\n", + "num_excellent, num_good, num_fair, num_poor - The number of stations thats goodness \n", + "measure was in categories, excellent, good, far, ect \n", + "\n", + "goodness_mode - The goodness matric category that appears the most often for a circle \n", + "\n", + "num_min_bw_noaa - The number of stations where the cbc Min was within NOAA bounds \n", + "num_min_bw_noaa - The number of stations where the cbc Max was within NOAA bounds\n", + "\n", + "The next sections for \"e_\" and \"n_\" are the same as abouve, except e is only stations for a circle that\n", + "are in the same ecosystem and n is stations with the same nlcd code.\n", + "\n", + "p - The proportion of stations that reported a max temp within bounds for a circle\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 274, + "metadata": {}, + "outputs": [], + "source": [ + "# Compute the fields used for Analysis\n", + "circle_data = g.apply(temp_calc)" + ] + }, + { + "cell_type": "code", + "execution_count": 275, + "metadata": {}, + "outputs": [], + "source": [ + "AGREEMENT_THRESHOLD = 0.75\n", + "AGREEMENT_THRESHOLD = max(AGREEMENT_THRESHOLD, 1 - AGREEMENT_THRESHOLD)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Determine if there is Consensus amoung a Circle's Reference Stations\n", + "Using the AGREEMENT_THRESHOLD set at the top of the notebook, determine if the there are enough stations for\n", + "a circle agree that it rained based on that agreement threshold.\n", + "\n", + "Do this for all stations and stations within the same ecosys and nlcd code " + ] + }, + { + "cell_type": "code", + "execution_count": 276, + "metadata": {}, + "outputs": [], + "source": [ + "# if the stations couldnt agree (p ∈ [1-AGREEMENT_THRESHOLD, AGREEMENT_THRESHOLD]) or p is NaN, set the consenus to NaN as well\n", + "circle_data['consensus'] = np.where(((circle_data.p >= (1-AGREEMENT_THRESHOLD)) & (circle_data.p <= AGREEMENT_THRESHOLD)) | circle_data.p.isna(), \n", + " pd.NA, circle_data.p >= AGREEMENT_THRESHOLD)\n", + "\n", + "circle_data['e_consensus'] = np.where(((circle_data.e_p >= (1-AGREEMENT_THRESHOLD)) & (circle_data.e_p <= AGREEMENT_THRESHOLD)) | circle_data.e_p.isna(), \n", + " pd.NA, circle_data.e_p >= AGREEMENT_THRESHOLD)\n", + "\n", + "circle_data['n_consensus'] = np.where(((circle_data.n_p >= (1-AGREEMENT_THRESHOLD)) & (circle_data.n_p <= AGREEMENT_THRESHOLD)) | circle_data.n_p.isna(), \n", + " pd.NA, circle_data.n_p >= AGREEMENT_THRESHOLD)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "code", + "execution_count": 277, + "metadata": {}, + "outputs": [], + "source": [ + "for c in ['consensus', 'e_consensus', 'n_consensus','max_temp_closest', 'max_temp_closest_notna', 'max_temp_lowest', 'max_temp_lowest_notna']:\n", + " circle_data[c] = circle_data[c].astype('boolean')\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": 278, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uicount_yearcircle_nameEcosys_circlespecific_circle_ecosystemNlcd_code_circlemacro_circle_ecosystemcircle_elevationcircle_latcircle_loncircle_min_tempcircle_max_tempEcosys_stationspecific_station_ecosystemNlcd_code_stationmacro_station_ecosystemnoaa_elevationnoaa_latnoaa_lonnoaa_min_tempnoaa_max_tempcircle_average_tempnoaa_average_tempsame_ecosyssame_nlcddistance_diffelevation_diffcbc_temp_presentstation_temp_presentmin_bw_noaamax_bw_noaatemp_goodnessgoodness_grade
circle_idcount_datenoaa_id
87ymwsn1968-12-26USC0051913022.0833-159.6667_19691969Kaua'i: WaimeaNaNNaNNaNNaN1015.7422.0833-159.666720.00000025.555556NaNNaNNaNNaN1051.922.1167-159.6167NaNNaN22.777778NaN<NA><NA>6352.27864436.16TrueTrue0.00.0NaNpoor
87ysunb1971-12-27US1HIKI000421.9667-159.4333_19721972Kaua'i: Lihu'eNaNNaNNaNNaN142.9621.9667-159.433323.33333325.555556NaNNaNNaNNaN186.821.9249-159.4983NaNNaN24.444444NaN<NA><NA>8159.88975343.84TrueTrue0.00.0NaNpoor
US1HIKI001421.9667-159.4333_19721972Kaua'i: Lihu'eNaNNaNNaNNaN142.9621.9667-159.433323.33333325.555556NaNNaNNaNNaN131.721.9055-159.5150NaNNaN24.444444NaN<NA><NA>10834.61410711.26TrueTrue0.00.0NaNpoor
US1HIKI001821.9667-159.4333_19721972Kaua'i: Lihu'eNaNNaNNaNNaN142.9621.9667-159.433323.33333325.555556NaNNaNNaNNaN160.321.9075-159.5181NaNNaN24.444444NaN<NA><NA>10950.02715117.34TrueTrue0.00.0NaNpoor
USC0051000621.9667-159.4333_19721972Kaua'i: Lihu'eNaNNaNNaNNaN142.9621.9667-159.433323.33333325.555556NaNNaNNaNNaN107.021.9500-159.4333NaNNaN24.444444NaN<NA><NA>1857.47992135.96TrueTrue0.00.0NaNpoor
\n", + "
" + ], + "text/plain": [ + " ui count_year \\\n", + "circle_id count_date noaa_id \n", + "87ymwsn 1968-12-26 USC00519130 22.0833-159.6667_1969 1969 \n", + "87ysunb 1971-12-27 US1HIKI0004 21.9667-159.4333_1972 1972 \n", + " US1HIKI0014 21.9667-159.4333_1972 1972 \n", + " US1HIKI0018 21.9667-159.4333_1972 1972 \n", + " USC00510006 21.9667-159.4333_1972 1972 \n", + "\n", + " circle_name Ecosys_circle \\\n", + "circle_id count_date noaa_id \n", + "87ymwsn 1968-12-26 USC00519130 Kaua'i: Waimea NaN \n", + "87ysunb 1971-12-27 US1HIKI0004 Kaua'i: Lihu'e NaN \n", + " US1HIKI0014 Kaua'i: Lihu'e NaN \n", + " US1HIKI0018 Kaua'i: Lihu'e NaN \n", + " USC00510006 Kaua'i: Lihu'e NaN \n", + "\n", + " specific_circle_ecosystem Nlcd_code_circle \\\n", + "circle_id count_date noaa_id \n", + "87ymwsn 1968-12-26 USC00519130 NaN NaN \n", + "87ysunb 1971-12-27 US1HIKI0004 NaN NaN \n", + " US1HIKI0014 NaN NaN \n", + " US1HIKI0018 NaN NaN \n", + " USC00510006 NaN NaN \n", + "\n", + " macro_circle_ecosystem circle_elevation \\\n", + "circle_id count_date noaa_id \n", + "87ymwsn 1968-12-26 USC00519130 NaN 1015.74 \n", + "87ysunb 1971-12-27 US1HIKI0004 NaN 142.96 \n", + " US1HIKI0014 NaN 142.96 \n", + " US1HIKI0018 NaN 142.96 \n", + " USC00510006 NaN 142.96 \n", + "\n", + " circle_lat circle_lon circle_min_temp \\\n", + "circle_id count_date noaa_id \n", + "87ymwsn 1968-12-26 USC00519130 22.0833 -159.6667 20.000000 \n", + "87ysunb 1971-12-27 US1HIKI0004 21.9667 -159.4333 23.333333 \n", + " US1HIKI0014 21.9667 -159.4333 23.333333 \n", + " US1HIKI0018 21.9667 -159.4333 23.333333 \n", + " USC00510006 21.9667 -159.4333 23.333333 \n", + "\n", + " circle_max_temp Ecosys_station \\\n", + "circle_id count_date noaa_id \n", + "87ymwsn 1968-12-26 USC00519130 25.555556 NaN \n", + "87ysunb 1971-12-27 US1HIKI0004 25.555556 NaN \n", + " US1HIKI0014 25.555556 NaN \n", + " US1HIKI0018 25.555556 NaN \n", + " USC00510006 25.555556 NaN \n", + "\n", + " specific_station_ecosystem \\\n", + "circle_id count_date noaa_id \n", + "87ymwsn 1968-12-26 USC00519130 NaN \n", + "87ysunb 1971-12-27 US1HIKI0004 NaN \n", + " US1HIKI0014 NaN \n", + " US1HIKI0018 NaN \n", + " USC00510006 NaN \n", + "\n", + " Nlcd_code_station macro_station_ecosystem \\\n", + "circle_id count_date noaa_id \n", + "87ymwsn 1968-12-26 USC00519130 NaN NaN \n", + "87ysunb 1971-12-27 US1HIKI0004 NaN NaN \n", + " US1HIKI0014 NaN NaN \n", + " US1HIKI0018 NaN NaN \n", + " USC00510006 NaN NaN \n", + "\n", + " noaa_elevation noaa_lat noaa_lon \\\n", + "circle_id count_date noaa_id \n", + "87ymwsn 1968-12-26 USC00519130 1051.9 22.1167 -159.6167 \n", + "87ysunb 1971-12-27 US1HIKI0004 186.8 21.9249 -159.4983 \n", + " US1HIKI0014 131.7 21.9055 -159.5150 \n", + " US1HIKI0018 160.3 21.9075 -159.5181 \n", + " USC00510006 107.0 21.9500 -159.4333 \n", + "\n", + " noaa_min_temp noaa_max_temp \\\n", + "circle_id count_date noaa_id \n", + "87ymwsn 1968-12-26 USC00519130 NaN NaN \n", + "87ysunb 1971-12-27 US1HIKI0004 NaN NaN \n", + " US1HIKI0014 NaN NaN \n", + " US1HIKI0018 NaN NaN \n", + " USC00510006 NaN NaN \n", + "\n", + " circle_average_temp noaa_average_temp \\\n", + "circle_id count_date noaa_id \n", + "87ymwsn 1968-12-26 USC00519130 22.777778 NaN \n", + "87ysunb 1971-12-27 US1HIKI0004 24.444444 NaN \n", + " US1HIKI0014 24.444444 NaN \n", + " US1HIKI0018 24.444444 NaN \n", + " USC00510006 24.444444 NaN \n", + "\n", + " same_ecosys same_nlcd distance_diff \\\n", + "circle_id count_date noaa_id \n", + "87ymwsn 1968-12-26 USC00519130 6352.278644 \n", + "87ysunb 1971-12-27 US1HIKI0004 8159.889753 \n", + " US1HIKI0014 10834.614107 \n", + " US1HIKI0018 10950.027151 \n", + " USC00510006 1857.479921 \n", + "\n", + " elevation_diff cbc_temp_present \\\n", + "circle_id count_date noaa_id \n", + "87ymwsn 1968-12-26 USC00519130 36.16 True \n", + "87ysunb 1971-12-27 US1HIKI0004 43.84 True \n", + " US1HIKI0014 11.26 True \n", + " US1HIKI0018 17.34 True \n", + " USC00510006 35.96 True \n", + "\n", + " station_temp_present min_bw_noaa \\\n", + "circle_id count_date noaa_id \n", + "87ymwsn 1968-12-26 USC00519130 True 0.0 \n", + "87ysunb 1971-12-27 US1HIKI0004 True 0.0 \n", + " US1HIKI0014 True 0.0 \n", + " US1HIKI0018 True 0.0 \n", + " USC00510006 True 0.0 \n", + "\n", + " max_bw_noaa temp_goodness goodness_grade \n", + "circle_id count_date noaa_id \n", + "87ymwsn 1968-12-26 USC00519130 0.0 NaN poor \n", + "87ysunb 1971-12-27 US1HIKI0004 0.0 NaN poor \n", + " US1HIKI0014 0.0 NaN poor \n", + " US1HIKI0018 0.0 NaN poor \n", + " USC00510006 0.0 NaN poor " + ] + }, + "execution_count": 278, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "temp_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Merge is the circle recorded data with the summary dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 279, + "metadata": {}, + "outputs": [], + "source": [ + "circle_obs = temp_df[['ui','circle_min_temp', 'circle_max_temp', 'circle_average_temp', 'Ecosys_station' ,'Nlcd_code_station']].groupby(['circle_id', 'count_date']).agg('first')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 280, + "metadata": {}, + "outputs": [], + "source": [ + "circle_data = circle_obs.join(circle_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 281, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uicircle_min_tempcircle_max_tempcircle_average_tempEcosys_stationNlcd_code_stationnumnum_notnanum_min_bw_noaanum_max_bw_noaanum_excellentnum_goodnum_fairnum_poorgoodness_modee_nume_num_notnae_num_min_bw_noaae_num_max_bw_noaae_num_excellente_num_goode_num_faire_num_poore_goodness_moden_numn_num_notnan_num_min_bw_noaan_num_max_bw_noaan_num_excellentn_num_goodn_num_fairn_num_poorn_goodness_modepe_pn_pgoodness_closestmax_temp_closestmax_temp_closest_valuemax_temp_closest_notnamin_temp_closestmin_temp_closest_valuemin_temp_closest_notnagoodness_lowestmax_temp_lowestmax_temp_lowest_valuemax_temp_lowest_notnamin_temp_lowestmin_temp_lowest_valuemin_temp_lowest_notnaconsensuse_consensusn_consensus
circle_idcount_date
87ymwsn1968-12-2622.0833-159.6667_196920.00000025.55555622.777778NaNNaN110.00.00001poor00NaNNaN0000NaN00NaNNaN0000NaN0.0NaNNaNpoorFalseNaN<NA>0.0NaN<NA>poorFalseNaNFalse0NaN0False<NA><NA>
87ysunb1971-12-2721.9667-159.4333_197223.33333325.55555624.444444NaNNaN13130.00.000013poor00NaNNaN0000NaN00NaNNaN0000NaN0.0NaNNaNpoorFalseNaN<NA>0.0NaN<NA>poorFalseNaNFalse0NaN0False<NA><NA>
1972-12-1721.9667-159.4333_197322.22222225.55555623.888889NaNNaN13130.00.000013poor00NaNNaN0000NaN00NaNNaN0000NaN0.0NaNNaNpoorFalseNaN<NA>0.0NaN<NA>poorFalseNaNFalse0NaN0False<NA><NA>
1973-12-1621.9667-159.4333_197417.22222227.77777822.500000NaNNaN13130.00.000013poor00NaNNaN0000NaN00NaNNaN0000NaN0.0NaNNaNpoorFalseNaN<NA>0.0NaN<NA>poorFalseNaNFalse0NaN0False<NA><NA>
1974-12-1521.9667-159.4333_197522.22222227.77777825.000000NaNNaN13130.00.000013poor00NaNNaN0000NaN00NaNNaN0000NaN0.0NaNNaNpoorFalseNaN<NA>0.0NaN<NA>poorFalseNaNFalse0NaN0False<NA><NA>
\n", + "
" + ], + "text/plain": [ + " ui circle_min_temp circle_max_temp \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 22.0833-159.6667_1969 20.000000 25.555556 \n", + "87ysunb 1971-12-27 21.9667-159.4333_1972 23.333333 25.555556 \n", + " 1972-12-17 21.9667-159.4333_1973 22.222222 25.555556 \n", + " 1973-12-16 21.9667-159.4333_1974 17.222222 27.777778 \n", + " 1974-12-15 21.9667-159.4333_1975 22.222222 27.777778 \n", + "\n", + " circle_average_temp Ecosys_station Nlcd_code_station \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 22.777778 NaN NaN \n", + "87ysunb 1971-12-27 24.444444 NaN NaN \n", + " 1972-12-17 23.888889 NaN NaN \n", + " 1973-12-16 22.500000 NaN NaN \n", + " 1974-12-15 25.000000 NaN NaN \n", + "\n", + " num num_notna num_min_bw_noaa num_max_bw_noaa \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 1 1 0.0 0.0 \n", + "87ysunb 1971-12-27 13 13 0.0 0.0 \n", + " 1972-12-17 13 13 0.0 0.0 \n", + " 1973-12-16 13 13 0.0 0.0 \n", + " 1974-12-15 13 13 0.0 0.0 \n", + "\n", + " num_excellent num_good num_fair num_poor \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 0 0 0 1 \n", + "87ysunb 1971-12-27 0 0 0 13 \n", + " 1972-12-17 0 0 0 13 \n", + " 1973-12-16 0 0 0 13 \n", + " 1974-12-15 0 0 0 13 \n", + "\n", + " goodness_mode e_num e_num_notna e_num_min_bw_noaa \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 poor 0 0 NaN \n", + "87ysunb 1971-12-27 poor 0 0 NaN \n", + " 1972-12-17 poor 0 0 NaN \n", + " 1973-12-16 poor 0 0 NaN \n", + " 1974-12-15 poor 0 0 NaN \n", + "\n", + " e_num_max_bw_noaa e_num_excellent e_num_good \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 NaN 0 0 \n", + "87ysunb 1971-12-27 NaN 0 0 \n", + " 1972-12-17 NaN 0 0 \n", + " 1973-12-16 NaN 0 0 \n", + " 1974-12-15 NaN 0 0 \n", + "\n", + " e_num_fair e_num_poor e_goodness_mode n_num \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 0 0 NaN 0 \n", + "87ysunb 1971-12-27 0 0 NaN 0 \n", + " 1972-12-17 0 0 NaN 0 \n", + " 1973-12-16 0 0 NaN 0 \n", + " 1974-12-15 0 0 NaN 0 \n", + "\n", + " n_num_notna n_num_min_bw_noaa n_num_max_bw_noaa \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 0 NaN NaN \n", + "87ysunb 1971-12-27 0 NaN NaN \n", + " 1972-12-17 0 NaN NaN \n", + " 1973-12-16 0 NaN NaN \n", + " 1974-12-15 0 NaN NaN \n", + "\n", + " n_num_excellent n_num_good n_num_fair n_num_poor \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 0 0 0 0 \n", + "87ysunb 1971-12-27 0 0 0 0 \n", + " 1972-12-17 0 0 0 0 \n", + " 1973-12-16 0 0 0 0 \n", + " 1974-12-15 0 0 0 0 \n", + "\n", + " n_goodness_mode p e_p n_p goodness_closest \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 NaN 0.0 NaN NaN poor \n", + "87ysunb 1971-12-27 NaN 0.0 NaN NaN poor \n", + " 1972-12-17 NaN 0.0 NaN NaN poor \n", + " 1973-12-16 NaN 0.0 NaN NaN poor \n", + " 1974-12-15 NaN 0.0 NaN NaN poor \n", + "\n", + " max_temp_closest max_temp_closest_value \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 False NaN \n", + "87ysunb 1971-12-27 False NaN \n", + " 1972-12-17 False NaN \n", + " 1973-12-16 False NaN \n", + " 1974-12-15 False NaN \n", + "\n", + " max_temp_closest_notna min_temp_closest \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 0.0 \n", + "87ysunb 1971-12-27 0.0 \n", + " 1972-12-17 0.0 \n", + " 1973-12-16 0.0 \n", + " 1974-12-15 0.0 \n", + "\n", + " min_temp_closest_value min_temp_closest_notna \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 NaN \n", + "87ysunb 1971-12-27 NaN \n", + " 1972-12-17 NaN \n", + " 1973-12-16 NaN \n", + " 1974-12-15 NaN \n", + "\n", + " goodness_lowest max_temp_lowest max_temp_lowest_value \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 poor False NaN \n", + "87ysunb 1971-12-27 poor False NaN \n", + " 1972-12-17 poor False NaN \n", + " 1973-12-16 poor False NaN \n", + " 1974-12-15 poor False NaN \n", + "\n", + " max_temp_lowest_notna min_temp_lowest \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 False 0 \n", + "87ysunb 1971-12-27 False 0 \n", + " 1972-12-17 False 0 \n", + " 1973-12-16 False 0 \n", + " 1974-12-15 False 0 \n", + "\n", + " min_temp_lowest_value min_temp_lowest_notna consensus \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 NaN 0 False \n", + "87ysunb 1971-12-27 NaN 0 False \n", + " 1972-12-17 NaN 0 False \n", + " 1973-12-16 NaN 0 False \n", + " 1974-12-15 NaN 0 False \n", + "\n", + " e_consensus n_consensus \n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 \n", + "87ysunb 1971-12-27 \n", + " 1972-12-17 \n", + " 1973-12-16 \n", + " 1974-12-15 " + ] + }, + "execution_count": 281, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "circle_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 282, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "68390" + ] + }, + "execution_count": 282, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Count the number of circles we have \n", + "circle_data['ui'].nunique()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The number of stations per circle" + ] + }, + { + "cell_type": "code", + "execution_count": 283, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 68390.000000\n", + "mean 6.025808\n", + "std 7.684488\n", + "min 1.000000\n", + "25% 1.000000\n", + "50% 3.000000\n", + "75% 8.000000\n", + "max 101.000000\n", + "Name: num, dtype: float64" + ] + }, + "execution_count": 283, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "circle_data.num.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## The Missing Station and Circle Data" + ] + }, + { + "cell_type": "code", + "execution_count": 284, + "metadata": {}, + "outputs": [], + "source": [ + "# Create an list of indexes with missing min OR max temp\n", + "circle_data_na_circle_idx = (circle_data.circle_min_temp.isna() | circle_data.circle_max_temp.isna())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 285, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Circles with missing Max or Min Temp Data: \n", + "10925\n", + "Circles with NO missing Temp Data: \n", + "57465\n" + ] + } + ], + "source": [ + "print(\"Circles with missing Max or Min Temp Data: \")\n", + "print(circle_data_na_circle_idx.sum())\n", + "print(\"Circles with NO missing Temp Data: \")\n", + "print(len(circle_data) - circle_data_na_circle_idx.sum())" + ] + }, + { + "cell_type": "code", + "execution_count": 286, + "metadata": {}, + "outputs": [], + "source": [ + "# Quick Look at Missing Max Temp\n", + "circle_data_maxna_circle_idx = circle_data.circle_max_temp.isna()" + ] + }, + { + "cell_type": "code", + "execution_count": 287, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Circles with missing MAX Temp Data: \n", + "10919\n", + "Circles with NO missing MAX Temp Data: \n", + "57471\n" + ] + } + ], + "source": [ + "print(\"Circles with missing MAX Temp Data: \")\n", + "print(circle_data_maxna_circle_idx.sum())\n", + "print(\"Circles with NO missing MAX Temp Data: \")\n", + "print(len(circle_data) - circle_data_maxna_circle_idx.sum())" + ] + }, + { + "cell_type": "code", + "execution_count": 288, + "metadata": {}, + "outputs": [], + "source": [ + "# Quick Look at Missing Min Temp\n", + "circle_data_minna_circle_idx = circle_data.circle_min_temp.isna()" + ] + }, + { + "cell_type": "code", + "execution_count": 289, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Circles with missing Min Temp Data: \n", + "10895\n", + "Circles with NO missing Min Temp Data: \n", + "57495\n" + ] + } + ], + "source": [ + "print(\"Circles with missing Min Temp Data: \")\n", + "print(circle_data_minna_circle_idx.sum())\n", + "print(\"Circles with NO missing Min Temp Data: \")\n", + "print(len(circle_data) - circle_data_minna_circle_idx.sum())" + ] + }, + { + "cell_type": "code", + "execution_count": 290, + "metadata": {}, + "outputs": [], + "source": [ + "# Quick Look at Missing BOTH min and max temp\n", + "circle_data_bothna_circle_idx = (circle_data.circle_min_temp.isna() & circle_data.circle_max_temp.isna())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 291, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Circles with missing Temp Data: \n", + "10889\n", + "Circles with NO missing Temp Data: \n", + "57501\n" + ] + } + ], + "source": [ + "print(\"Circles with missing Temp Data: \")\n", + "print(circle_data_bothna_circle_idx.sum())\n", + "print(\"Circles with NO missing Temp Data: \")\n", + "print(len(circle_data) - circle_data_bothna_circle_idx.sum())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Count and Percent of Stations Reporting if the Circle Reported Temp Data" + ] + }, + { + "cell_type": "code", + "execution_count": 292, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Stations with missing Temp Data IF the the Circle is Reporting:\n", + "0\n", + "0.0\n", + "Stations with NO missing Temp Data IF the Circle is Reporting: \n", + "57465\n", + "100.0\n" + ] + } + ], + "source": [ + "print(\"Stations with missing Temp Data IF the the Circle is Reporting:\")\n", + "print(circle_data[(~circle_data_na_circle_idx) & (circle_data.num_notna == 0)].shape[0])\n", + "print(circle_data[(~circle_data_na_circle_idx) & (circle_data.num_notna == 0)].shape[0] / len(circle_data[~circle_data_na_circle_idx]) * 100 )\n", + "print(\"Stations with NO missing Temp Data IF the Circle is Reporting: \")\n", + "print(circle_data[(~circle_data_na_circle_idx) & (circle_data.num_notna != 0)].shape[0])\n", + "print(circle_data[(~circle_data_na_circle_idx) & (circle_data.num_notna != 0)].shape[0] / len(circle_data[~circle_data_na_circle_idx]) * 100 )\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Count and Percent of Closest Stations Reporting if the Circle Reported Temp Data" + ] + }, + { + "cell_type": "code", + "execution_count": 293, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Closest Stations with missing Max Temp Data IF the the Circle is Reporting:\n", + "0\n", + "0.0\n", + "Closest Stations with NO missing Max Temp Data IF the Circle is Reporting: \n", + "57465\n", + "100.0\n" + ] + } + ], + "source": [ + "print(\"Closest Stations with missing Max Temp Data IF the the Circle is Reporting:\")\n", + "print(circle_data[(~circle_data_na_circle_idx) & (circle_data.max_temp_closest.isna())].shape[0])\n", + "print(circle_data[(~circle_data_na_circle_idx) & (circle_data.max_temp_closest.isna())].shape[0] / len(circle_data[~circle_data_na_circle_idx]) * 100 )\n", + "print(\"Closest Stations with NO missing Max Temp Data IF the Circle is Reporting: \")\n", + "print(circle_data[(~circle_data_na_circle_idx) & (~circle_data.max_temp_closest.isna())].shape[0])\n", + "print(circle_data[(~circle_data_na_circle_idx) & (~circle_data.max_temp_closest.isna())].shape[0] / len(circle_data[~circle_data_na_circle_idx]) * 100 )\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Count and Percent of Closest Elivation Stations Reporting if the Circle Reported Rain Data" + ] + }, + { + "cell_type": "code", + "execution_count": 294, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Closest Elivation Stations with missing Max Temp Data IF the the Circle is Reporting:\n", + "241\n", + "0.41938571304272165\n", + "Closest Elivation Stations with NO missing Max Temp Data IF the Circle is Reporting: \n", + "57224\n", + "99.58061428695729\n" + ] + } + ], + "source": [ + "print(\"Closest Elivation Stations with missing Max Temp Data IF the the Circle is Reporting:\")\n", + "print(circle_data[(~circle_data_na_circle_idx) & (circle_data.max_temp_lowest.isna())].shape[0])\n", + "print(circle_data[(~circle_data_na_circle_idx) & (circle_data.max_temp_lowest.isna())].shape[0] / len(circle_data[~circle_data_na_circle_idx]) * 100 )\n", + "print(\"Closest Elivation Stations with NO missing Max Temp Data IF the Circle is Reporting: \")\n", + "print(circle_data[(~circle_data_na_circle_idx) & (~circle_data.max_temp_lowest.isna())].shape[0])\n", + "print(circle_data[(~circle_data_na_circle_idx) & (~circle_data.max_temp_lowest.isna())].shape[0] / len(circle_data[~circle_data_na_circle_idx]) * 100 )\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Circle v Station: The Final Determination on If Volenteers are Reliable" + ] + }, + { + "cell_type": "code", + "execution_count": 295, + "metadata": {}, + "outputs": [], + "source": [ + "def crosstab_metrics(actl):\n", + " # accuracy\n", + " acc = actl.to_numpy().diagonal().sum() / actl.to_numpy().sum()\n", + " # precision\n", + " pr = actl.loc[1,1] / actl.to_numpy()[[1,0], [1,1]].sum()\n", + " # recall\n", + " re = actl.loc[1,1] / actl.to_numpy()[[1,1], [1,0]].sum()\n", + " # F1\n", + " f1 = 2 * pr * re / (pr + re)\n", + " \n", + " print(f\"accuracy: {acc*100:.2f}%\")\n", + " print(f\"precision: {pr*100:.2f}%\")\n", + " print(f\"recall: {re*100:.2f}%\")\n", + " print(f\"F1: {f1*100:.2f}%\")\n", + " \n", + "# return (acc, pr, re, f1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Based on the all Reference Stations for a CBC Circle" + ] + }, + { + "cell_type": "code", + "execution_count": 297, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cicles where the circle Min temp is in bounds for NO references stations:\n", + "39160\n", + "0.572598333089633\n", + "Cicles where the circle Min temp is in bounds for 0 to 50% matching references stations\n", + "14132\n", + "20.663839742652435\n", + "Cicles where the circle Min temp is in bounds for 50% to All matching references stations\n", + "2502\n", + "3.658429594970025\n", + "Cicles where the circle Min temp is in bounds for ALL references stations: \n", + "1701\n", + "0.024872057318321393\n" + ] + } + ], + "source": [ + "# Check that the Minimum and Maximun Values are within the NOAA bounds\n", + "\n", + "print(\"Cicles where the circle Min temp is in bounds for NO references stations:\")\n", + "print(((circle_data.num_min_bw_noaa / circle_data.num) == 0).sum())\n", + "print(((circle_data.num_min_bw_noaa / circle_data.num) == 0).sum() / circle_data.shape[0] * 1)\n", + "print(\"Cicles where the circle Min temp is in bounds for 0 to 50% matching references stations\")\n", + "print((((circle_data.num_min_bw_noaa / circle_data.num) > 0) & ((circle_data.num_min_bw_noaa / circle_data.num) < .50)).sum())\n", + "print((((circle_data.num_min_bw_noaa / circle_data.num) > 0) & ((circle_data.num_min_bw_noaa / circle_data.num) < .50)).sum() / circle_data.shape[0] * 100)\n", + "print(\"Cicles where the circle Min temp is in bounds for 50% to All matching references stations\")\n", + "print((((circle_data.num_min_bw_noaa / circle_data.num) < 1) & ((circle_data.num_min_bw_noaa / circle_data.num) >= .50)).sum())\n", + "print((((circle_data.num_min_bw_noaa / circle_data.num) < 1) & ((circle_data.num_min_bw_noaa / circle_data.num) >= .50)).sum() / circle_data.shape[0] * 100)\n", + "print(\"Cicles where the circle Min temp is in bounds for ALL references stations: \")\n", + "print(((circle_data.num_min_bw_noaa / circle_data.num) == 1).sum())\n", + "print(((circle_data.num_min_bw_noaa / circle_data.num) == 1).sum() / circle_data.shape[0] * 1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 300, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Circles where the circle max temp is in bounds for NO references stations:\n", + "40798\n", + "59.654920309986835\n", + "Cicles where the circle max temp is in bounds for 0 to 50% matching references stations\n", + "13008\n", + "19.020324608860946\n", + "Cicles where the circle max temp is in bounds for 50% to All matching references stations\n", + "2163\n", + "3.1627430910951895\n", + "Cicles where the circle max temp is in bounds for ALL references stations: \n", + "1502\n", + "2.1962275186430764\n" + ] + } + ], + "source": [ + "# Check that the maximum and Maximun Values are within the NOAA bounds\n", + "\n", + "print(\"Circles where the circle max temp is in bounds for NO references stations:\")\n", + "print(((circle_data.num_max_bw_noaa / circle_data.num) == 0).sum())\n", + "print(((circle_data.num_max_bw_noaa / circle_data.num) == 0).sum() / circle_data.shape[0] * 100)\n", + "print(\"Cicles where the circle max temp is in bounds for 0 to 50% matching references stations\")\n", + "print((((circle_data.num_max_bw_noaa / circle_data.num) > 0) & ((circle_data.num_max_bw_noaa / circle_data.num) < .50)).sum())\n", + "print((((circle_data.num_max_bw_noaa / circle_data.num) > 0) & ((circle_data.num_max_bw_noaa / circle_data.num) < .50)).sum() / circle_data.shape[0] * 100)\n", + "print(\"Cicles where the circle max temp is in bounds for 50% to All matching references stations\")\n", + "print((((circle_data.num_max_bw_noaa / circle_data.num) < 1) & ((circle_data.num_max_bw_noaa / circle_data.num) >= .50)).sum())\n", + "print((((circle_data.num_max_bw_noaa / circle_data.num) < 1) & ((circle_data.num_max_bw_noaa / circle_data.num) >= .50)).sum() / circle_data.shape[0] * 100)\n", + "print(\"Cicles where the circle max temp is in bounds for ALL references stations: \")\n", + "print(((circle_data.num_max_bw_noaa / circle_data.num) == 1).sum())\n", + "print(((circle_data.num_max_bw_noaa / circle_data.num) == 1).sum() / circle_data.shape[0] * 100)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Deep Dive on if Max Temperture Is In Bounds of Their Reference Stations" + ] + }, + { + "cell_type": "code", + "execution_count": 302, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uicircle_min_tempcircle_max_tempcircle_average_tempEcosys_stationNlcd_code_stationnumnum_notnanum_min_bw_noaanum_max_bw_noaanum_excellentnum_goodnum_fairnum_poorgoodness_modee_nume_num_notnae_num_min_bw_noaae_num_max_bw_noaae_num_excellente_num_goode_num_faire_num_poore_goodness_moden_numn_num_notnan_num_min_bw_noaan_num_max_bw_noaan_num_excellentn_num_goodn_num_fairn_num_poorn_goodness_modepe_pn_pgoodness_closestmax_temp_closestmax_temp_closest_valuemax_temp_closest_notnamin_temp_closestmin_temp_closest_valuemin_temp_closest_notnagoodness_lowestmax_temp_lowestmax_temp_lowest_valuemax_temp_lowest_notnamin_temp_lowestmin_temp_lowest_valuemin_temp_lowest_notnaconsensuse_consensusn_consensus
circle_idcount_date
87ymwsn1968-12-2622.0833-159.6667_196920.00000025.55555622.777778NaNNaN110.00.00001poor00NaNNaN0000NaN00NaNNaN0000NaN0.0NaNNaNpoorFalseNaN<NA>0.0NaN<NA>poorFalseNaNFalse0NaN0False<NA><NA>
87ysunb1971-12-2721.9667-159.4333_197223.33333325.55555624.444444NaNNaN13130.00.000013poor00NaNNaN0000NaN00NaNNaN0000NaN0.0NaNNaNpoorFalseNaN<NA>0.0NaN<NA>poorFalseNaNFalse0NaN0False<NA><NA>
1972-12-1721.9667-159.4333_197322.22222225.55555623.888889NaNNaN13130.00.000013poor00NaNNaN0000NaN00NaNNaN0000NaN0.0NaNNaNpoorFalseNaN<NA>0.0NaN<NA>poorFalseNaNFalse0NaN0False<NA><NA>
1973-12-1621.9667-159.4333_197417.22222227.77777822.500000NaNNaN13130.00.000013poor00NaNNaN0000NaN00NaNNaN0000NaN0.0NaNNaNpoorFalseNaN<NA>0.0NaN<NA>poorFalseNaNFalse0NaN0False<NA><NA>
1974-12-1521.9667-159.4333_197522.22222227.77777825.000000NaNNaN13130.00.000013poor00NaNNaN0000NaN00NaNNaN0000NaN0.0NaNNaNpoorFalseNaN<NA>0.0NaN<NA>poorFalseNaNFalse0NaN0False<NA><NA>
\n", + "
" + ], + "text/plain": [ + " ui circle_min_temp circle_max_temp \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 22.0833-159.6667_1969 20.000000 25.555556 \n", + "87ysunb 1971-12-27 21.9667-159.4333_1972 23.333333 25.555556 \n", + " 1972-12-17 21.9667-159.4333_1973 22.222222 25.555556 \n", + " 1973-12-16 21.9667-159.4333_1974 17.222222 27.777778 \n", + " 1974-12-15 21.9667-159.4333_1975 22.222222 27.777778 \n", + "\n", + " circle_average_temp Ecosys_station Nlcd_code_station \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 22.777778 NaN NaN \n", + "87ysunb 1971-12-27 24.444444 NaN NaN \n", + " 1972-12-17 23.888889 NaN NaN \n", + " 1973-12-16 22.500000 NaN NaN \n", + " 1974-12-15 25.000000 NaN NaN \n", + "\n", + " num num_notna num_min_bw_noaa num_max_bw_noaa \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 1 1 0.0 0.0 \n", + "87ysunb 1971-12-27 13 13 0.0 0.0 \n", + " 1972-12-17 13 13 0.0 0.0 \n", + " 1973-12-16 13 13 0.0 0.0 \n", + " 1974-12-15 13 13 0.0 0.0 \n", + "\n", + " num_excellent num_good num_fair num_poor \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 0 0 0 1 \n", + "87ysunb 1971-12-27 0 0 0 13 \n", + " 1972-12-17 0 0 0 13 \n", + " 1973-12-16 0 0 0 13 \n", + " 1974-12-15 0 0 0 13 \n", + "\n", + " goodness_mode e_num e_num_notna e_num_min_bw_noaa \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 poor 0 0 NaN \n", + "87ysunb 1971-12-27 poor 0 0 NaN \n", + " 1972-12-17 poor 0 0 NaN \n", + " 1973-12-16 poor 0 0 NaN \n", + " 1974-12-15 poor 0 0 NaN \n", + "\n", + " e_num_max_bw_noaa e_num_excellent e_num_good \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 NaN 0 0 \n", + "87ysunb 1971-12-27 NaN 0 0 \n", + " 1972-12-17 NaN 0 0 \n", + " 1973-12-16 NaN 0 0 \n", + " 1974-12-15 NaN 0 0 \n", + "\n", + " e_num_fair e_num_poor e_goodness_mode n_num \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 0 0 NaN 0 \n", + "87ysunb 1971-12-27 0 0 NaN 0 \n", + " 1972-12-17 0 0 NaN 0 \n", + " 1973-12-16 0 0 NaN 0 \n", + " 1974-12-15 0 0 NaN 0 \n", + "\n", + " n_num_notna n_num_min_bw_noaa n_num_max_bw_noaa \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 0 NaN NaN \n", + "87ysunb 1971-12-27 0 NaN NaN \n", + " 1972-12-17 0 NaN NaN \n", + " 1973-12-16 0 NaN NaN \n", + " 1974-12-15 0 NaN NaN \n", + "\n", + " n_num_excellent n_num_good n_num_fair n_num_poor \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 0 0 0 0 \n", + "87ysunb 1971-12-27 0 0 0 0 \n", + " 1972-12-17 0 0 0 0 \n", + " 1973-12-16 0 0 0 0 \n", + " 1974-12-15 0 0 0 0 \n", + "\n", + " n_goodness_mode p e_p n_p goodness_closest \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 NaN 0.0 NaN NaN poor \n", + "87ysunb 1971-12-27 NaN 0.0 NaN NaN poor \n", + " 1972-12-17 NaN 0.0 NaN NaN poor \n", + " 1973-12-16 NaN 0.0 NaN NaN poor \n", + " 1974-12-15 NaN 0.0 NaN NaN poor \n", + "\n", + " max_temp_closest max_temp_closest_value \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 False NaN \n", + "87ysunb 1971-12-27 False NaN \n", + " 1972-12-17 False NaN \n", + " 1973-12-16 False NaN \n", + " 1974-12-15 False NaN \n", + "\n", + " max_temp_closest_notna min_temp_closest \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 0.0 \n", + "87ysunb 1971-12-27 0.0 \n", + " 1972-12-17 0.0 \n", + " 1973-12-16 0.0 \n", + " 1974-12-15 0.0 \n", + "\n", + " min_temp_closest_value min_temp_closest_notna \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 NaN \n", + "87ysunb 1971-12-27 NaN \n", + " 1972-12-17 NaN \n", + " 1973-12-16 NaN \n", + " 1974-12-15 NaN \n", + "\n", + " goodness_lowest max_temp_lowest max_temp_lowest_value \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 poor False NaN \n", + "87ysunb 1971-12-27 poor False NaN \n", + " 1972-12-17 poor False NaN \n", + " 1973-12-16 poor False NaN \n", + " 1974-12-15 poor False NaN \n", + "\n", + " max_temp_lowest_notna min_temp_lowest \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 False 0 \n", + "87ysunb 1971-12-27 False 0 \n", + " 1972-12-17 False 0 \n", + " 1973-12-16 False 0 \n", + " 1974-12-15 False 0 \n", + "\n", + " min_temp_lowest_value min_temp_lowest_notna consensus \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 NaN 0 False \n", + "87ysunb 1971-12-27 NaN 0 False \n", + " 1972-12-17 NaN 0 False \n", + " 1973-12-16 NaN 0 False \n", + " 1974-12-15 NaN 0 False \n", + "\n", + " e_consensus n_consensus \n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 \n", + "87ysunb 1971-12-27 \n", + " 1972-12-17 \n", + " 1973-12-16 \n", + " 1974-12-15 " + ] + }, + "execution_count": 302, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "circle_data.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "#### Removing rows without temperature data for either CBC Circles or NOAA stations." + "### Maximum Temperture In bounds of NOAA Stations" ] }, { "cell_type": "code", - "execution_count": 13, - "metadata": {}, + "execution_count": 303, + "metadata": { + "scrolled": true + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Number of rows before: 756378\n", - "Number of rows after: 61777\n", - "Total removed: 694601\n" + "Temp Data Consensus Amoung All Stations where the Circle Reported Max Temp was within bounds:\n", + "False 8235\n", + "NaN 4848\n", + "True 1367\n", + "Name: consensus, dtype: Int64\n", + "^^ with Percentage:\n", + "False 56.989619\n", + "NaN 33.550173\n", + "True 9.460208\n", + "Name: consensus, dtype: float64\n", + "Temp Data Amoung Closest Stations where the Circle Reported Max Temp Between Bounds:\n", + "False 9908\n", + "True 4542\n", + "NaN 0\n", + "Name: max_temp_closest, dtype: Int64\n", + "^^ with Percentage:\n", + "False 68.567474\n", + "True 31.432526\n", + "NaN 0.000000\n", + "Name: max_temp_closest, dtype: float64\n", + "Snow Data Amoung Closest in Elivation Stations where the Circle Max Temp is within Bounds:\n", + "False 9845\n", + "True 4528\n", + "NaN 77\n", + "Name: max_temp_lowest, dtype: Int64\n", + "^^ with Percentage:\n", + "False 68.131488\n", + "True 31.335640\n", + "NaN 0.532872\n", + "Name: max_temp_lowest, dtype: float64\n", + "Snow Data Consensus Amoung Stations with the same nlcd code where the Circle Max Temp is within Bounds:\n", + "NaN 7609\n", + "False 4775\n", + "True 2066\n", + "Name: n_consensus, dtype: Int64\n", + "^^ with Percentage:\n", + "NaN 52.657439\n", + "False 33.044983\n", + "True 14.297578\n", + "Name: n_consensus, dtype: float64\n", + "Snow Data Consensus Amoung Stations with the same ecosys classification where the Circle Max Temp is within Bounds:\n", + "NaN 8114\n", + "False 4316\n", + "True 2020\n", + "Name: e_consensus, dtype: Int64\n", + "^^ with Percentage:\n", + "NaN 56.152249\n", + "False 29.868512\n", + "True 13.979239\n", + "Name: e_consensus, dtype: float64\n" ] } ], "source": [ - "temp_df.dropna(axis=0, subset=['circle_min_temp', 'circle_max_temp', 'noaa_min_temp', 'noaa_max_temp'], inplace=True)\n", - "print(f\"Number of rows before: {row_count}\")\n", - "print(f\"Number of rows after: {temp_df.shape[0]}\")\n", - "print(f\"Total removed: {row_count - temp_df.shape[0]}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "-----" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Out of Bounds Data " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Temperature Data\n", - "There are a number of outliers in the data set that could highly skew analysis. Any rows with a temperature outside of a min or max recorded temperature in the United States will be dropped.\n", + "print(\"Temp Data Consensus Amoung All Stations where the Circle Reported Max Temp was within bounds:\")\n", + "print(circle_data.loc[circle_data['num_max_bw_noaa'] == True].consensus.value_counts(dropna = False) )\n", + "print(\"^^ with Percentage:\")\n", + "print((circle_data.loc[circle_data['num_max_bw_noaa'] == True].consensus.value_counts(dropna = False) / len(circle_data.loc[circle_data['num_max_bw_noaa'] == True]) * 100))\n", "\n", - "To be conservative in data dropping we'll only using on max and one min for the entire country rather than by state or other locality. Additionally we'll check by each min/max temp for circles and stations to get an idea on if one is more error prone than another.\n", + "print(\"Temp Data Amoung Closest Stations where the Circle Reported Max Temp Between Bounds:\")\n", + "print(circle_data.loc[circle_data['num_max_bw_noaa'] == True].max_temp_closest.value_counts(dropna = False) )\n", + "print(\"^^ with Percentage:\")\n", + "print((circle_data.loc[circle_data['num_max_bw_noaa'] == True].max_temp_closest.value_counts(dropna = False) / len(circle_data.loc[circle_data['num_max_bw_noaa'] == True]) * 100))\n", "\n", - "Data: https://en.wikipedia.org/wiki/U.S._state_and_territory_temperature_extremes" + "print(\"Snow Data Amoung Closest in Elivation Stations where the Circle Max Temp is within Bounds:\")\n", + "print(circle_data.loc[circle_data['num_max_bw_noaa'] == True].max_temp_lowest.value_counts(dropna = False) )\n", + "print(\"^^ with Percentage:\")\n", + "print((circle_data.loc[circle_data['num_max_bw_noaa'] == True].max_temp_lowest.value_counts(dropna = False) / len(circle_data.loc[circle_data['num_max_bw_noaa'] == True]) * 100))\n", + "\n", + "print(\"Snow Data Consensus Amoung Stations with the same nlcd code where the Circle Max Temp is within Bounds:\")\n", + "print(circle_data.loc[circle_data['num_max_bw_noaa'] == True].n_consensus.value_counts(dropna = False) )\n", + "print(\"^^ with Percentage:\")\n", + "print((circle_data.loc[circle_data['num_max_bw_noaa'] == True].n_consensus.value_counts(dropna = False) / len(circle_data.loc[circle_data['num_max_bw_noaa'] == True]) * 100))\n", + "\n", + "print(\"Snow Data Consensus Amoung Stations with the same ecosys classification where the Circle Max Temp is within Bounds:\")\n", + "print(circle_data.loc[circle_data['num_max_bw_noaa'] == True].e_consensus.value_counts(dropna = False) )\n", + "print(\"^^ with Percentage:\")\n", + "print((circle_data.loc[circle_data['num_max_bw_noaa'] == True].e_consensus.value_counts(dropna = False) / len(circle_data.loc[circle_data['num_max_bw_noaa'] == True]) * 100))\n", + "\n" ] }, { "cell_type": "code", - "execution_count": 14, - "metadata": {}, + "execution_count": 304, + "metadata": { + "scrolled": true + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Number of CBC measurments outside max : 3\n", - "Number of NOAA measurments outside max : 14207\n", - "\n", - "Number of CBC measurments outside min : 0\n", - "Number of NOAA measurments outside min : 17098\n", - "\n", - "Number of NOAA stations with both outside : 125\n" + "Temp Data Consensus Amoung All Stations where the Circle did NOT Report Max Temp within Bounds:\n", + "False 40798\n", + "NaN 0\n", + "Name: consensus, dtype: Int64\n", + "^^ with Percentage:\n", + "False 100.0\n", + "NaN 0.0\n", + "Name: consensus, dtype: float64\n", + "Temp Data Amoung Closest Stations where the Circle did NOT Report Max Temp within Bounds:\n", + "False 40798\n", + "NaN 0\n", + "Name: max_temp_closest, dtype: Int64\n", + "^^ with Percentage:\n", + "False 100.0\n", + "NaN 0.0\n", + "Name: max_temp_closest, dtype: float64\n", + "Temp Data Amoung Closest in Elivation Stations where the Circle did NOT Report Max Temp within Bounds:\n", + "False 40639\n", + "NaN 159\n", + "Name: max_temp_lowest, dtype: Int64\n", + "^^ with Percentage:\n", + "False 99.610275\n", + "NaN 0.389725\n", + "Name: max_temp_lowest, dtype: float64\n", + "Temp Data Consensus Amoung Stations with the same nlcd code where the Circle did NOT Report Max Temp within Bounds:\n", + "False 24082\n", + "NaN 16716\n", + "Name: n_consensus, dtype: Int64\n", + "^^ with Percentage:\n", + "False 59.027403\n", + "NaN 40.972597\n", + "Name: n_consensus, dtype: float64\n", + "Temp Data Consensus Amoung Stations with the same ecosys classification where the Circle did NOT Report Max Temp within Bounds:\n", + "NaN 21107\n", + "False 19691\n", + "Name: e_consensus, dtype: Int64\n", + "^^ with Percentage:\n", + "NaN 51.735379\n", + "False 48.264621\n", + "Name: e_consensus, dtype: float64\n" ] } ], "source": [ - "# Creating variables for each drop condition\n", - "circle_over_max_temp = temp_df.loc[temp_df[\"circle_max_temp\"]>max_temp_check]\n", - "circle_under_min_temp = temp_df.loc[temp_df[\"circle_min_temp\"]max_temp_check]\n", - "noaa_under_min_temp = temp_df.loc[temp_df[\"noaa_min_temp\"] max_temp_check) & (temp_df[\"noaa_min_temp\"] < min_temp_check)].shape[0]}')\n", + "print(\"Temp Data Amoung Closest in Elivation Stations where the Circle did NOT Report Max Temp within Bounds:\")\n", + "print(circle_data.loc[circle_data['num_max_bw_noaa'] == False].max_temp_lowest.value_counts(dropna = False) )\n", + "print(\"^^ with Percentage:\")\n", + "print((circle_data.loc[circle_data['num_max_bw_noaa'] == False].max_temp_lowest.value_counts(dropna = False) / len(circle_data.loc[circle_data['num_max_bw_noaa'] == False]) * 100))\n", "\n", - "# Setting list of indices to drop\n", - "index_drop_list = list(circle_over_max_temp.index) + list(circle_under_min_temp.index) + list(noaa_over_max_temp.index) + list(noaa_under_min_temp.index)\n", + "print(\"Temp Data Consensus Amoung Stations with the same nlcd code where the Circle did NOT Report Max Temp within Bounds:\")\n", + "print(circle_data.loc[circle_data['num_max_bw_noaa'] == False].n_consensus.value_counts(dropna = False) )\n", + "print(\"^^ with Percentage:\")\n", + "print((circle_data.loc[circle_data['num_max_bw_noaa'] == False].n_consensus.value_counts(dropna = False) / len(circle_data.loc[circle_data['num_max_bw_noaa'] == False]) * 100))\n", "\n", - "# Dropping All out of bout roundsRows\n", - "temp_df.drop(index_drop_list, inplace=True)" + "print(\"Temp Data Consensus Amoung Stations with the same ecosys classification where the Circle did NOT Report Max Temp within Bounds:\")\n", + "print(circle_data.loc[circle_data['num_max_bw_noaa'] == False].e_consensus.value_counts(dropna = False) )\n", + "print(\"^^ with Percentage:\")\n", + "print((circle_data.loc[circle_data['num_max_bw_noaa'] == False].e_consensus.value_counts(dropna = False) / len(circle_data.loc[circle_data['num_max_bw_noaa'] == False]) * 100))\n", + "\n", + "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "#### Distance Data" + "### Minimum Temperture In bounds of NOAA Stations" ] }, { "cell_type": "code", - "execution_count": 15, - "metadata": {}, + "execution_count": 305, + "metadata": { + "scrolled": true + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Number of rows dropped outside of distance threshold: 23364\n" + "Temp Data Consensus Amoung All Stations where the Circle Reported Min Temp was within bounds:\n", + "False 11839\n", + "NaN 3019\n", + "True 858\n", + "Name: consensus, dtype: Int64\n", + "^^ with Percentage:\n", + "False 75.330873\n", + "NaN 19.209723\n", + "True 5.459404\n", + "Name: consensus, dtype: float64\n", + "Temp Data Amoung Closest Stations where the Circle Reported Min Temp Between Bounds:\n", + "0.0 10748\n", + "1.0 4968\n", + "Name: min_temp_closest, dtype: int64\n", + "^^ with Percentage:\n", + "0.0 68.388903\n", + "1.0 31.611097\n", + "Name: min_temp_closest, dtype: float64\n", + "Snow Data Amoung Closest in Elivation Stations where the Circle Min Temp is within Bounds:\n", + "0.0 10659\n", + "1.0 4969\n", + "NaN 88\n", + "Name: min_temp_lowest, dtype: int64\n", + "^^ with Percentage:\n", + "0.0 67.822601\n", + "1.0 31.617460\n", + "NaN 0.559939\n", + "Name: min_temp_lowest, dtype: float64\n", + "Snow Data Consensus Amoung Stations with the same nlcd code where the Circle Min Temp is within Bounds:\n", + "False 7547\n", + "NaN 6872\n", + "True 1297\n", + "Name: n_consensus, dtype: Int64\n", + "^^ with Percentage:\n", + "False 48.021125\n", + "NaN 43.726139\n", + "True 8.252736\n", + "Name: n_consensus, dtype: float64\n", + "Snow Data Consensus Amoung Stations with the same ecosys classification where the Circle Min Temp is within Bounds:\n", + "NaN 7877\n", + "False 6587\n", + "True 1252\n", + "Name: e_consensus, dtype: Int64\n", + "^^ with Percentage:\n", + "NaN 50.120896\n", + "False 41.912700\n", + "True 7.966404\n", + "Name: e_consensus, dtype: float64\n" ] } ], "source": [ - "# Dropping rows with distance differences larger then set threshold\n", - "temp_df.drop(temp_df[temp_df['distance_diff'] > distance_threshold].index, inplace=True)\n", - "print(f'Number of rows dropped outside of distance threshold: {temp_df.shape[0]}')" + "print(\"Temp Data Consensus Amoung All Stations where the Circle Reported Min Temp was within bounds:\")\n", + "print(circle_data.loc[circle_data['num_min_bw_noaa'] == True].consensus.value_counts(dropna = False) )\n", + "print(\"^^ with Percentage:\")\n", + "print((circle_data.loc[circle_data['num_min_bw_noaa'] == True].consensus.value_counts(dropna = False) / len(circle_data.loc[circle_data['num_min_bw_noaa'] == True]) * 100))\n", + "\n", + "print(\"Temp Data Amoung Closest Stations where the Circle Reported Min Temp Between Bounds:\")\n", + "print(circle_data.loc[circle_data['num_min_bw_noaa'] == True].min_temp_closest.value_counts(dropna = False) )\n", + "print(\"^^ with Percentage:\")\n", + "print((circle_data.loc[circle_data['num_min_bw_noaa'] == True].min_temp_closest.value_counts(dropna = False) / len(circle_data.loc[circle_data['num_min_bw_noaa'] == True]) * 100))\n", + "\n", + "print(\"Snow Data Amoung Closest in Elivation Stations where the Circle Min Temp is within Bounds:\")\n", + "print(circle_data.loc[circle_data['num_min_bw_noaa'] == True].min_temp_lowest.value_counts(dropna = False) )\n", + "print(\"^^ with Percentage:\")\n", + "print((circle_data.loc[circle_data['num_min_bw_noaa'] == True].min_temp_lowest.value_counts(dropna = False) / len(circle_data.loc[circle_data['num_min_bw_noaa'] == True]) * 100))\n", + "\n", + "print(\"Snow Data Consensus Amoung Stations with the same nlcd code where the Circle Min Temp is within Bounds:\")\n", + "print(circle_data.loc[circle_data['num_min_bw_noaa'] == True].n_consensus.value_counts(dropna = False) )\n", + "print(\"^^ with Percentage:\")\n", + "print((circle_data.loc[circle_data['num_min_bw_noaa'] == True].n_consensus.value_counts(dropna = False) / len(circle_data.loc[circle_data['num_min_bw_noaa'] == True]) * 100))\n", + "\n", + "print(\"Snow Data Consensus Amoung Stations with the same ecosys classification where the Circle Min Temp is within Bounds:\")\n", + "print(circle_data.loc[circle_data['num_min_bw_noaa'] == True].e_consensus.value_counts(dropna = False) )\n", + "print(\"^^ with Percentage:\")\n", + "print((circle_data.loc[circle_data['num_min_bw_noaa'] == True].e_consensus.value_counts(dropna = False) / len(circle_data.loc[circle_data['num_min_bw_noaa'] == True]) * 100))\n", + "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "#### Elevation Data" + "## Population Compasison \n", + "Compare the opulations of the Max Temps reported by the cbc circles and the Max temps recorded by the NOAA circles " ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 313, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Number of rows dropped outside of elevation threshold: 16986\n" + "The number of cases for CBC Max Temps is: 68390\n", + "The number of cases for NOAA Max Temps is: 68390\n" ] } ], "source": [ - "# Dropping rows with circles and stations that are over the elevation threshold\n", - "temp_df.drop(temp_df[temp_df['elevation_diff'] > elevation_threshold].index, inplace=True)\n", + "# Set up the two populations\n", + "cbc_max_temps = circle_data['circle_max_temp']\n", + "noaa_max_temps = circle_data['max_temp_closest_value']\n", "\n", - "# Dropping rows with no elevation data\n", - "temp_df.dropna(subset=['circle_elevation', 'noaa_elevation'], inplace=True)\n", - "print(f'Number of rows dropped outside of elevation threshold: {temp_df.shape[0]}')" + "print(\"The number of cases for CBC Max Temps is: \" + str(cbc_max_temps.shape[0]))\n", + "print(\"The number of cases for NOAA Max Temps is: \" + str(noaa_max_temps.shape[0]))" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 314, "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The number of cases for CBC Max Temps is: 57471\n", + "The number of cases for NOAA Max Temps is: 18466\n" + ] + } + ], "source": [ - "#### Checking to see how many CBC Circle temperatures records are within the bounds of the NOAA Station records" + "# drop the Nan Values \n", + "cbc_max_temps = cbc_max_temps.dropna()\n", + "noaa_max_temps = noaa_max_temps.dropna()\n", + "\n", + "print(\"The number of cases for CBC Max Temps is: \" + str(cbc_max_temps.shape[0]))\n", + "print(\"The number of cases for NOAA Max Temps is: \" + str(noaa_max_temps.shape[0]))" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 315, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "count 57471.000000\n", + "mean 5.589304\n", + "std 9.684150\n", + "min -45.000000\n", + "25% -0.555556\n", + "50% 4.444444\n", + "75% 12.222222\n", + "max 56.000000\n", + "Name: circle_max_temp, dtype: float64" + ] + }, + "execution_count": 315, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "temp_df['temp_check'] = temp_df['circle_average_temp'].between(temp_df['noaa_min_temp'], temp_df['noaa_max_temp'])" + "cbc_max_temps.describe()" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 316, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of CBC Cirlcs who's temperature is in the bounds of the corresponding NOAA station: 10544\n", - "Number of CBC Cirlcs who's temperature is not in the bounds of the corresponding NOAA station: 6442\n", - "\n", - "62% of stations lay between\n" - ] + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 316, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYYAAAD4CAYAAADo30HgAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAUtklEQVR4nO3df4xdZ33n8fenSRNSWnAg3VlkR2tLWFQBFzU7CqkiVSNSJU6CcP6gKCgCh3prVRtYumsJHPgjEhApqJumoBZWFklrqoiQTaliNSnBGzKqkDYmBCghCWlGwRBbCaE4CTXZwk72u3/cx+19jJ2x7x3PtWfeL2k05zznOfc8X58Zf+b8uOemqpAk6ZBfmvQAJEknF4NBktQxGCRJHYNBktQxGCRJndMnPYBRnXPOObV27dol3+5Pf/pTXvnKVy75didlpdULK69m613eDq/3oYce+qeq+vWXW+eUDYa1a9fy9a9/fcm3Ozs7y8zMzJJvd1JWWr2w8mq23uXt8HqTfH+hdTyVJEnqGAySpI7BIEnqGAySpI7BIEnqGAySpI7BIEnqGAySpI7BIEnqnLLvfJZOpLXb7x553b03XrGII5GWnkcMkqSOwSBJ6hgMkqSOwSBJ6hgMkqSOwSBJ6hgMkqSOwSBJ6hgMkqSOwSBJ6hgMkqSOwSBJ6hgMkqTOgsGQ5NYkzyb5zlDbHyf5bpJvJ/mbJKuGll2XZC7J40kuHWrf2Nrmkmwfal+XZE9r/0KSMxazQEnS8TmWI4a/BDYe1rYbeFNV/Sbwj8B1AEnOA64C3tjW+XSS05KcBvw5cBlwHvCu1hfgE8DNVfV64Dlgy1gVSZLGsmAwVNXfAwcOa/tyVc232QeANW16E3B7Vf2sqr4HzAEXtK+5qnqyqn4O3A5sShLgrcCdbf2dwJVj1iRJGsNifFDP7wNfaNOrGQTFIftaG8BTh7W/BXgt8PxQyAz3/wVJtgJbAaamppidnR137Mft4MGDE9nupKy0emFQ87YNL428/qn277XS9rH1LmysYEjyEWAeuG2c1zlWVbUD2AEwPT1dMzMzS7HZzuzsLJPY7qSstHphUPNNX/3pyOvvvXpm8QazBFbaPrbehY0cDEmuAd4GXFxV1Zr3A+cOdVvT2jhK+4+BVUlOb0cNw/0lSRMw0u2qSTYCHwTeXlUvDi3aBVyV5Mwk64D1wNeAB4H17Q6kMxhcoN7VAuV+4B1t/c3AXaOVIklaDMdyu+rngf8NvCHJviRbgD8Dfg3YneRbSf4HQFU9AtwBPAp8Cbi2ql5qRwPvA+4FHgPuaH0BPgT8tyRzDK453LKoFUqSjsuCp5Kq6l1HaD7qf95VdQNwwxHa7wHuOUL7kwzuWpIknQR857MkqWMwSJI6BoMkqWMwSJI6BoMkqWMwSJI6BoMkqWMwSJI6BoMkqWMwSJI6BoMkqbMYH9Qjacja7XePvO7eG69YxJFIo/GIQZLUMRgkSR2DQZLUMRgkSR2DQZLUMRgkSR2DQZLUMRgkSR2DQZLUMRgkSZ0FgyHJrUmeTfKdobbXJNmd5In2/ezWniSfSjKX5NtJzh9aZ3Pr/0SSzUPt/zHJw22dTyXJYhcpSTp2x3LE8JfAxsPatgP3VdV64L42D3AZsL59bQU+A4MgAa4H3gJcAFx/KExanz8YWu/wbUmSltCCwVBVfw8cOKx5E7CzTe8Erhxq/1wNPACsSvI64FJgd1UdqKrngN3AxrbsVVX1QFUV8Lmh15IkTcCoT1edqqqn2/QzwFSbXg08NdRvX2t7ufZ9R2g/oiRbGRyJMDU1xezs7IjDH93Bgwcnst1JWWn1wqDmbRtemsi2/Zk+8ax3YWM/druqKkmN+zrHuK0dwA6A6enpmpmZWYrNdmZnZ5nEdidlpdULg5pv+upPJ7LtvVfPLPk2V9o+tt6FjXpX0g/baSDa92db+37g3KF+a1rby7WvOUK7JGlCRg2GXcChO4s2A3cNtb+n3Z10IfBCO+V0L3BJkrPbRedLgHvbsp8kubDdjfSeodeSJE3AgqeSknwemAHOSbKPwd1FNwJ3JNkCfB94Z+t+D3A5MAe8CLwXoKoOJPkY8GDr99GqOnRB+z8zuPPpLODv2pckaUIWDIaqetdRFl18hL4FXHuU17kVuPUI7V8H3rTQOCRJS8N3PkuSOgaDJKljMEiSOmO/j0E6Wa3dfvdI623bMI+/GlrJPGKQJHUMBklSx2CQJHUMBklSx2CQJHUMBklSx2CQJHUMBklSx2CQJHUMBklSx2CQJHUMBklSx2CQJHUMBklSx2CQJHUMBklSx2CQJHUMBklSx88vlE4io34c6SF7b7xikUailWysI4Yk/zXJI0m+k+TzSV6RZF2SPUnmknwhyRmt75ltfq4tXzv0Ote19seTXDpeSZKkcYwcDElWA/8FmK6qNwGnAVcBnwBurqrXA88BW9oqW4DnWvvNrR9JzmvrvRHYCHw6yWmjjkuSNJ5xrzGcDpyV5HTgV4CngbcCd7blO4Er2/SmNk9bfnGStPbbq+pnVfU9YA64YMxxSZJGNPI1hqran+S/Az8A/g/wZeAh4Pmqmm/d9gGr2/Rq4Km27nySF4DXtvYHhl56eJ1Okq3AVoCpqSlmZ2dHHf7IDh48OJHtTsqpXO+2DfMLdzqCqbNGX3fSRtlXp/I+HoX1LmzkYEhyNoO/9tcBzwP/k8GpoBOmqnYAOwCmp6drZmbmRG7uiGZnZ5nEdiflVK73mhEv5G7bMM9ND5+a92XsvXrmuNc5lffxKKx3YeOcSvpd4HtV9aOq+r/AF4GLgFXt1BLAGmB/m94PnAvQlr8a+PFw+xHWkSQtsXGC4QfAhUl+pV0ruBh4FLgfeEfrsxm4q03vavO05V+pqmrtV7W7ltYB64GvjTEuSdIYxrnGsCfJncA3gHngmwxO89wN3J7k463tlrbKLcBfJZkDDjC4E4mqeiTJHQxCZR64tqpeGnVckqTxjHUitaquB64/rPlJjnBXUVX9C/B7R3mdG4AbxhmLJGlx+EgMSVLHYJAkdQwGSVLHYJAkdQwGSVLHYJAkdQwGSVLHYJAkdQwGSVLHYJAkdQwGSVLHYJAkdQwGSVLHYJAkdQwGSVLHYJAkdQwGSVLHYJAkdQwGSVLHYJAkdQwGSVLHYJAkdQwGSVJnrGBIsirJnUm+m+SxJL+d5DVJdid5on0/u/VNkk8lmUvy7STnD73O5tb/iSSbxy1KkjS6cY8YPgl8qap+A3gz8BiwHbivqtYD97V5gMuA9e1rK/AZgCSvAa4H3gJcAFx/KEwkSUtv5GBI8mrgd4BbAKrq51X1PLAJ2Nm67QSubNObgM/VwAPAqiSvAy4FdlfVgap6DtgNbBx1XJKk8Zw+xrrrgB8Bf5HkzcBDwAeAqap6uvV5Bphq06uBp4bW39fajtb+C5JsZXC0wdTUFLOzs2MMfzQHDx6cyHYn5VSud9uG+ZHWmzpr9HUnbZR9dSrv41FY78LGCYbTgfOB91fVniSf5N9OGwFQVZWkxthGp6p2ADsApqena2ZmZrFe+pjNzs4yie1Oyqlc7zXb7x5pvW0b5rnp4XF+NSZn79Uzx73OqbyPR2G9CxvnGsM+YF9V7WnzdzIIih+2U0S078+25fuBc4fWX9PajtYuSZqAkYOhqp4BnkryhtZ0MfAosAs4dGfRZuCuNr0LeE+7O+lC4IV2yule4JIkZ7eLzpe0NknSBIx7vPx+4LYkZwBPAu9lEDZ3JNkCfB94Z+t7D3A5MAe82PpSVQeSfAx4sPX7aFUdGHNckqQRjRUMVfUtYPoIiy4+Qt8Crj3K69wK3DrOWCRJi8N3PkuSOgaDJKljMEiSOgaDJKljMEiSOgaDJKljMEiSOgaDJKljMEiSOgaDJKljMEiSOgaDJKljMEiSOgaDJKljMEiSOgaDJKljMEiSOgaDJKljMEiSOmN95rN0Iq3dfvekhyCtSB4xSJI6HjFIy8goR1nbNsxzzfa72XvjFSdgRDoVjX3EkOS0JN9M8rdtfl2SPUnmknwhyRmt/cw2P9eWrx16jeta++NJLh13TJKk0S3GqaQPAI8NzX8CuLmqXg88B2xp7VuA51r7za0fSc4DrgLeCGwEPp3ktEUYlyRpBGMFQ5I1wBXAZ9t8gLcCd7YuO4Er2/SmNk9bfnHrvwm4vap+VlXfA+aAC8YZlyRpdONeY/hT4IPAr7X51wLPV9V8m98HrG7Tq4GnAKpqPskLrf9q4IGh1xxep5NkK7AVYGpqitnZ2TGHf/wOHjw4ke1OyiTr3bZhfuFOJ8DUWZPb9iQcqnel/Fz7O7ywkYMhyduAZ6vqoSQzo77O8aiqHcAOgOnp6ZqZWZLNdmZnZ5nEdidlkvVeM6HbVbdtmOemh1fOfRmH6t179cykh7Ik/B1e2Dg//RcBb09yOfAK4FXAJ4FVSU5vRw1rgP2t/37gXGBfktOBVwM/Hmo/ZHgdSdISG/kaQ1VdV1Vrqmotg4vHX6mqq4H7gXe0bpuBu9r0rjZPW/6VqqrWflW7a2kdsB742qjjkiSN50QcL38IuD3Jx4FvAre09luAv0oyBxxgECZU1SNJ7gAeBeaBa6vqpRMwLknSMViUYKiqWWC2TT/JEe4qqqp/AX7vKOvfANywGGORJI3HR2JIkjoGgySpYzBIkjoGgySpYzBIkjoGgySpYzBIkjoGgySps3KeFCbpZY3zGdt++tvy4hGDJKljMEiSOgaDJKljMEiSOgaDJKljMEiSOgaDJKljMEiSOgaDJKljMEiSOgaDJKljMEiSOgaDJKljMEiSOiMHQ5Jzk9yf5NEkjyT5QGt/TZLdSZ5o389u7UnyqSRzSb6d5Pyh19rc+j+RZPP4ZUmSRjXOEcM8sK2qzgMuBK5Nch6wHbivqtYD97V5gMuA9e1rK/AZGAQJcD3wFuAC4PpDYSJJWnojB0NVPV1V32jT/ww8BqwGNgE7W7edwJVtehPwuRp4AFiV5HXApcDuqjpQVc8Bu4GNo45LkjSeRfkEtyRrgd8C9gBTVfV0W/QMMNWmVwNPDa22r7Udrf1I29nK4GiDqakpZmdnF2P4x+XgwYMT2e6kTLLebRvmJ7LdqbMmt+1JWIx6T6XfCX+HFzZ2MCT5VeCvgT+qqp8k+ddlVVVJatxtDL3eDmAHwPT0dM3MzCzWSx+z2dlZJrHdSZlkvdeM8VGT49i2YZ6bHl45n3q7GPXuvXpmcQazBPwdXthYdyUl+WUGoXBbVX2xNf+wnSKifX+2te8Hzh1afU1rO1q7JGkCxrkrKcAtwGNV9SdDi3YBh+4s2gzcNdT+nnZ30oXAC+2U073AJUnObhedL2ltkqQJGOf48SLg3cDDSb7V2j4M3AjckWQL8H3gnW3ZPcDlwBzwIvBegKo6kORjwIOt30er6sAY45IkjWHkYKiqrwI5yuKLj9C/gGuP8lq3AreOOhadvNZO6DqBpNH5zmdJUsdgkCR1DAZJUsdgkCR1DAZJUsdgkCR1DAZJUsdgkCR1Vs6TwiSdMOO8kXHvjVcs4ki0GDxikCR1DAZJUsdgkCR1DAZJUsdgkCR1vCtJC/LR2dLK4hGDJKljMEiSOgaDJKljMEiSOgaDJKljMEiSOgaDJKnj+xhWiFHfi7Btwzz+mEgry0lzxJBkY5LHk8wl2T7p8UjSSnVSBEOS04A/By4DzgPeleS8yY5Kklamk+UcwQXAXFU9CZDkdmAT8OhER7XIfLSE9Iv8kJ+TT6pq0mMgyTuAjVX1n9r8u4G3VNX7Duu3FdjaZt8APL6kAx04B/inCWx3UlZavbDyarbe5e3wev9DVf36y61wshwxHJOq2gHsmOQYkny9qqYnOYaltNLqhZVXs/Uub6PUe1JcYwD2A+cOza9pbZKkJXayBMODwPok65KcAVwF7JrwmCRpRTopTiVV1XyS9wH3AqcBt1bVIxMe1tFM9FTWBKy0emHl1Wy9y9tx13tSXHyWJJ08TpZTSZKkk4TBIEnqGAzHKcm2JJXknDafJJ9qj/L4dpLzJz3GxZDkj5N8t9X0N0lWDS27rtX7eJJLJznOxbTcH8uS5Nwk9yd5NMkjST7Q2l+TZHeSJ9r3syc91sWU5LQk30zyt21+XZI9bT9/od3wsmwkWZXkzvb7+1iS3z7efWwwHIck5wKXAD8Yar4MWN++tgKfmcDQToTdwJuq6jeBfwSuA2iPKrkKeCOwEfh0e6TJKW2FPJZlHthWVecBFwLXthq3A/dV1Xrgvja/nHwAeGxo/hPAzVX1euA5YMtERnXifBL4UlX9BvBmBrUf1z42GI7PzcAHgeEr9puAz9XAA8CqJK+byOgWUVV9uarm2+wDDN5bAoN6b6+qn1XV94A5Bo80OdX962NZqurnwKHHsiwbVfV0VX2jTf8zg/8wVjOoc2frthO4cjIjXHxJ1gBXAJ9t8wHeCtzZuiy3el8N/A5wC0BV/byqnuc497HBcIySbAL2V9U/HLZoNfDU0Py+1rac/D7wd216uda7XOs6oiRrgd8C9gBTVfV0W/QMMDWhYZ0If8rgj7n/1+ZfCzw/9EfPctvP64AfAX/RTp99NskrOc59fFK8j+FkkeR/Af/+CIs+AnyYwWmkZePl6q2qu1qfjzA4BXHbUo5NJ06SXwX+GvijqvrJ4I/ogaqqJMviHvYkbwOeraqHksxMejxL5HTgfOD9VbUnySc57LTRsexjg2FIVf3ukdqTbGCQxP/QfonWAN9IcgGn8OM8jlbvIUmuAd4GXFz/9oaXU7beBSzXujpJfplBKNxWVV9szT9M8rqqerqdBn12ciNcVBcBb09yOfAK4FUMzr+vSnJ6O2pYbvt5H7Cvqva0+TsZBMNx7WNPJR2Dqnq4qv5dVa2tqrUM/vHPr6pnGDy64z3t7qQLgReGDtlOWUk2MjgEf3tVvTi0aBdwVZIzk6xjcNH9a5MY4yJb9o9laefXbwEeq6o/GVq0C9jcpjcDdy312E6Eqrquqta039mrgK9U1dXA/cA7WrdlUy9A+z/pqSRvaE0XM/j4guPaxx4xjO8e4HIGF2FfBN472eEsmj8DzgR2t6OkB6rqD6vqkSR3MPhhmweuraqXJjjORXGKPZZlVBcB7wYeTvKt1vZh4EbgjiRbgO8D75zQ+JbKh4Dbk3wc+CbtQu0y8n7gtvYHzpMM/k/6JY5jH/tIDElSx1NJkqSOwSBJ6hgMkqSOwSBJ6hgMkqSOwSBJ6hgMkqTO/wfM5LZby84grAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" } ], "source": [ - "# Counting number of circles that are true\n", - "temp_true = sum(temp_df['temp_check'])\n", - "temp_false = temp_df.shape[0] - sum(temp_df['temp_check'])\n", - "print(f\"Number of CBC Cirlcs who's temperature is in the bounds of the corresponding NOAA station: {sum(temp_df['temp_check'])}\")\n", - "print(f\"Number of CBC Cirlcs who's temperature is not in the bounds of the corresponding NOAA station: {temp_df.shape[0] - sum(temp_df['temp_check'])}\")\n", - "print()\n", - "print(f\"{round((temp_true/temp_df.shape[0])*100)}% of stations lay between\")" + "cbc_max_temps.hist(bins =20)" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 317, "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 18466.000000\n", + "mean 5.931729\n", + "std 8.994429\n", + "min -35.600000\n", + "25% 0.000000\n", + "50% 5.000000\n", + "75% 11.700000\n", + "max 33.900000\n", + "Name: max_temp_closest_value, dtype: float64" + ] + }, + "execution_count": 317, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "---\n", - "## Temperature Measurement Goodness\n", - "\n", - "temp_metric = sqrt( (noaa_min_temp - circle_min_temp)^2 + (noaa_max_temp - circle_max_temp)^2 )" + "noaa_max_temps.describe()" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 318, "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 318, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAWxElEQVR4nO3dfWxd9X3H8feHECDCHQmDXaUhmlM1W0fJGsBKqDpNNqwQwrRQqa2CEA2Uyt1EtlaKNkJRC4UypWspEiqlcpWMsLK6GQXhQRhNUzzEH4GQNsR5KMOFMGKFRG1CigvLZvbdH/dndjF+uL6+T87v85KOfM73PNzvub7++Nxzz71XEYGZmeXhpEY3YGZm9ePQNzPLiEPfzCwjDn0zs4w49M3MMnJyoxsYz1lnnRWtra2NbmNMv/3tbzn99NMb3UbZ3G9tTad+p1Ov4H4na8eOHb+KiLNHm9fUod/a2spzzz3X6DbG1NvbS3t7e6PbKJv7ra3p1O906hXc72RJemWseT69Y2aWEYe+mVlGHPpmZhlx6JuZZcShb2aWEYe+mVlGHPpmZhlx6JuZZWTC0Jd0mqRnJT0vaY+kr6b6fZJelrQzDYtTXZLultQvaZekC0q2tUrSi2lYVbvdMjOz0ZTzjtzjwMURMShpJvC0pMfTvL+NiAdHLH85sDANS4F7gaWSzgRuAdqAAHZI6omIo9XYEbPppHXtYxWvu3/dFVXsxHIz4ZF+FA2myZlpGO/rtlYA96f1tgGzJc0FLgO2RMSRFPRbgGVTa9/MzCZD5XxdoqQZwA7gg8A9EXGjpPuAj1J8JrAVWBsRxyU9CqyLiKfTuluBG4F24LSI+Fqqfxl4KyK+OeK2OoFOgEKhcGF3d3c19rMmBgcHaWlpaXQbZXO/tTWZfvsGjlV8O4vmnVHxusNO5Pu2GTS6346Ojh0R0TbavLI+cC0i3gYWS5oNPCzpPOAm4DXgFKCLYrDfNtVmI6IrbY+2trZo5g9ZavSHKk2W+62tyfR77VRO71xd3m2M50S+b5tBM/c7qat3IuJ14ElgWUQcTKdwjgP/CCxJiw0A80tWOyfVxqqbmVmdlHP1ztnpCB9Js4CPA79I5+mRJOBKYHdapQf4TLqK5yLgWEQcBJ4ALpU0R9Ic4NJUMzOzOinn9M5cYGM6r38SsCkiHpX0U0lnAwJ2An+Zlt8MLAf6gTeB6wAi4oik24HtabnbIuJI9XbFzMwmMmHoR8Qu4PxR6hePsXwAN4wxbwOwYZI9mplZlfgduWZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYmDH1Jp0l6VtLzkvZI+mqqL5D0jKR+ST+UdEqqn5qm+9P81pJt3ZTqL0i6rFY7ZWZmoyvnSP84cHFEfARYDCyTdBHwdeCuiPggcBS4Pi1/PXA01e9KyyHpXGAl8GFgGfAdSTOquTNmZja+CUM/igbT5Mw0BHAx8GCqbwSuTOMr0jRp/iWSlOrdEXE8Il4G+oElVdkLMzMriyJi4oWKR+Q7gA8C9wDfALalo3kkzQcej4jzJO0GlkXEgTTvl8BS4Na0zvdTfX1a58ERt9UJdAIUCoULu7u7q7GfNTE4OEhLS0uj2yib+62tyfTbN3Cs4ttZNO+MitcddiLft82g0f12dHTsiIi20eadXM4GIuJtYLGk2cDDwIeq2N/I2+oCugDa2tqivb29Vjc1Zb29vTRzfyO539qaTL/Xrn2s4tvZf3V5tzGeE/m+bQbN3O+krt6JiNeBJ4GPArMlDf/TOAcYSOMDwHyANP8M4Nel9VHWMTOzOijn6p2z0xE+kmYBHwf2UQz/T6bFVgGPpPGeNE2a/9MonkPqAVamq3sWAAuBZ6u1I2ZmNrFyTu/MBTam8/onAZsi4lFJe4FuSV8Dfg6sT8uvB/5JUj9whOIVO0TEHkmbgL3AEHBDOm1kZmZ1MmHoR8Qu4PxR6i8xytU3EfFfwKfG2NYdwB2Tb9PMzKrB78g1M8uIQ9/MLCMOfTOzjDj0zcwy4tA3M8tIWe/INbP3ah3xrto1i4am9E5bs3rwkb6ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGZkw9CXNl/SkpL2S9kj6QqrfKmlA0s40LC9Z5yZJ/ZJekHRZSX1ZqvVLWlubXTIzs7GU83n6Q8CaiPiZpPcBOyRtSfPuiohvli4s6VxgJfBh4P3ATyT9QZp9D/Bx4ACwXVJPROytxo6YmdnEJgz9iDgIHEzjb0jaB8wbZ5UVQHdEHAdeltQPLEnz+iPiJQBJ3WlZh76ZWZ1M6py+pFbgfOCZVFotaZekDZLmpNo84NWS1Q6k2lh1MzOrE0VEeQtKLcC/A3dExEOSCsCvgABuB+ZGxGclfRvYFhHfT+utBx5Pm1kWEZ9L9WuApRGxesTtdAKdAIVC4cLu7u6p7mPNDA4O0tLS0ug2yuZ+q6tv4Ni7pguz4NBbtb/dRfPOmPI2mv2+Hcn9Tk5HR8eOiGgbbV5Z35EraSbwI+CBiHgIICIOlcz/HvBomhwA5pesfk6qMU79HRHRBXQBtLW1RXt7ezktNkRvby/N3N9I7re6Rn4f7ppFQ9zZV/uvnd5/dfuUt9Hs9+1I7rd6yrl6R8B6YF9EfKukPrdksU8Au9N4D7BS0qmSFgALgWeB7cBCSQsknULxxd6e6uyGmZmVo5zDko8B1wB9knam2peAqyQtpnh6Zz/weYCI2CNpE8UXaIeAGyLibQBJq4EngBnAhojYU8V9MTOzCZRz9c7TgEaZtXmcde4A7hilvnm89czMrLb8jlwzs4w49M3MMlL7Sw3MrKpaR1w1NFn7111RpU5sOvKRvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRhz6ZmYZmTD0Jc2X9KSkvZL2SPpCqp8paYukF9PPOakuSXdL6pe0S9IFJdtalZZ/UdKq2u2WmZmNppwj/SFgTUScC1wE3CDpXGAtsDUiFgJb0zTA5cDCNHQC90LxnwRwC7AUWALcMvyPwszM6mPC0I+IgxHxszT+BrAPmAesADamxTYCV6bxFcD9UbQNmC1pLnAZsCUijkTEUWALsKyqe2NmZuNSRJS/sNQKPAWcB/xnRMxOdQFHI2K2pEeBdRHxdJq3FbgRaAdOi4ivpfqXgbci4psjbqOT4jMECoXChd3d3VPZv5oaHBykpaWl0W2Uzf1WV9/AsXdNF2bBobca1MwkLJp3RtPftyO538np6OjYERFto807udyNSGoBfgR8MSJ+U8z5oogISeX/9xhHRHQBXQBtbW3R3t5ejc3WRG9vL83c30jut7quXfvYu6bXLBrizr6y/6QaZv/V7U1/347kfqunrKt3JM2kGPgPRMRDqXwonbYh/Tyc6gPA/JLVz0m1sepmZlYnEx6WpFM364F9EfGtklk9wCpgXfr5SEl9taRuii/aHouIg5KeAP6+5MXbS4GbqrMbZlau1rWPsWbR0HueqZRj/7oratCR1VM5z0U/BlwD9EnamWpfohj2myRdD7wCfDrN2wwsB/qBN4HrACLiiKTbge1pudsi4khV9sLMzMoyYeinF2Q1xuxLRlk+gBvG2NYGYMNkGjQzs+rxO3LNzDLi0Dczy4hD38wsIw59M7OMOPTNzDLi0Dczy4hD38wsI83/QSFmNdRawbtSzaYzH+mbmWXEoW9mlhGHvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRhz6ZmYZmTD0JW2QdFjS7pLarZIGJO1Mw/KSeTdJ6pf0gqTLSurLUq1f0trq74qZmU2knCP9+4Blo9TviojFadgMIOlcYCXw4bTOdyTNkDQDuAe4HDgXuCota2ZmdTThRytHxFOSWsvc3gqgOyKOAy9L6geWpHn9EfESgKTutOzeSXdsZmYVU0RMvFAx9B+NiPPS9K3AtcBvgOeANRFxVNK3gW0R8f203Hrg8bSZZRHxuVS/BlgaEatHua1OoBOgUChc2N3dPYXdq63BwUFaWloa3UbZ3O979Q0cq9q2CrPg0FtV21xNNaLXRfPOqHhdP3Ynp6OjY0dEtI02r9IvUbkXuB2I9PNO4LMVbutdIqIL6AJoa2uL9vb2amy2Jnp7e2nm/kZyv+91bRW/RGXNoiHu7Jse30vUiF73X91e8bp+7FZPRb/1iDg0PC7pe8CjaXIAmF+y6Dmpxjh1MzOrk4ou2ZQ0t2TyE8DwlT09wEpJp0paACwEngW2AwslLZB0CsUXe3sqb9vMzCox4ZG+pB8A7cBZkg4AtwDtkhZTPL2zH/g8QETskbSJ4gu0Q8ANEfF22s5q4AlgBrAhIvZUfW/MzGxc5Vy9c9Uo5fXjLH8HcMco9c3A5kl1Z2ZmVeV35JqZZcShb2aWEYe+mVlGHPpmZhlx6JuZZcShb2aWEYe+mVlGHPpmZhlx6JuZZcShb2aWEYe+mVlGHPpmZhlx6JuZZcShb2aWEYe+mVlGHPpmZhlx6JuZZaSiL0Y3M5us1rWPVbzufctOr2IneXPo27Q2lSAxy5FP75iZZWTC0Je0QdJhSbtLamdK2iLpxfRzTqpL0t2S+iXtknRByTqr0vIvSlpVm90xM7PxlHOkfx+wbERtLbA1IhYCW9M0wOXAwjR0AvdC8Z8EcAuwFFgC3DL8j8LMzOpnwtCPiKeAIyPKK4CNaXwjcGVJ/f4o2gbMljQXuAzYEhFHIuIosIX3/iMxM7Maq/SF3EJEHEzjrwGFND4PeLVkuQOpNlb9PSR1UnyWQKFQoLe3t8IWa29wcLCp+xvpROx3zaKh+jRThsKs5upnPNOpVzgxH7uNMuWrdyIiJEU1mknb6wK6ANra2qK9vb1am6663t5emrm/kU7Efq9toqt31iwa4s6+6XFB3HTqFYqXbJ5oj91GqfTqnUPptA3p5+FUHwDmlyx3TqqNVTczszqqNPR7gOErcFYBj5TUP5Ou4rkIOJZOAz0BXCppTnoB99JUMzOzOprw+Z2kHwDtwFmSDlC8CmcdsEnS9cArwKfT4puB5UA/8CZwHUBEHJF0O7A9LXdbRIx8cdjMzGpswtCPiKvGmHXJKMsGcMMY29kAbJhUd2ZmVlV+R66ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGZk+78M2s2z1DRyr+CM39q+7osrdTG8+0jczy4hD38wsIw59M7OMOPTNzDLi0Dczy4hD38wsIw59M7OMOPTNzDLi0Dczy4hD38wsIw59M7OMOPTNzDLi0Dczy8iUQl/Sfkl9knZKei7VzpS0RdKL6eecVJekuyX1S9ol6YJq7ICZmZWvGkf6HRGxOCLa0vRaYGtELAS2pmmAy4GFaegE7q3CbZuZ2STU4vTOCmBjGt8IXFlSvz+KtgGzJc2twe2bmdkYphr6AfxY0g5JnalWiIiDafw1oJDG5wGvlqx7INXMzKxOFBGVryzNi4gBSb8HbAH+GuiJiNklyxyNiDmSHgXWRcTTqb4VuDEinhuxzU6Kp38oFAoXdnd3V9xfrQ0ODtLS0tLoNsp2IvbbN3CsTt1MrDALDr3V6C7KM516han1u2jeGdVtpgyN/lvr6OjYUXLK/V2m9HWJETGQfh6W9DCwBDgkaW5EHEynbw6nxQeA+SWrn5NqI7fZBXQBtLW1RXt7+1RarKne3l6aub+RTsR+K/0KvVpYs2iIO/umxzeQTqdeYWr97r+6vbrNlKGZ/9Yq/q1LOh04KSLeSOOXArcBPcAqYF36+UhapQdYLakbWAocKzkNZBlrHSO41ywaaqpQNzsRTOVffQF4WNLwdv45Iv5N0nZgk6TrgVeAT6flNwPLgX7gTeC6Kdy2mVlZxjqoKNeJ9sXqFYd+RLwEfGSU+q+BS0apB3BDpbdnZmZT53fkmpllxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpaR6fMxe9bUpvr5JmZWHz7SNzPLiEPfzCwjDn0zs4w49M3MMuLQNzPLiK/esXf4ChyzE59D38xsHJUcDA1/v3MzftWiT++YmWXEoW9mlhGf3jnBjPdUdPgpp5nlq+5H+pKWSXpBUr+ktfW+fTOznNX1SF/SDOAe4OPAAWC7pJ6I2FvPPpqZr6Axs1qq9+mdJUB/RLwEIKkbWAGcUKHv4DYzmFoW1OrKH0VETTY86o1JnwSWRcTn0vQ1wNKIWF2yTCfQmSb/EHihbg1O3lnArxrdxCS439qaTv1Op17B/U7W70fE2aPNaLoXciOiC+hqdB/lkPRcRLQ1uo9yud/amk79Tqdewf1WU71fyB0A5pdMn5NqZmZWB/UO/e3AQkkLJJ0CrAR66tyDmVm26np6JyKGJK0GngBmABsiYk89e6iyaXEaqoT7ra3p1O906hXcb9XU9YVcMzNrLH8Mg5lZRhz6ZmYZcehXQNLtknZJ2inpx5Len+qSdHf6iIldki5odK8Akr4h6Repp4clzS6Zd1Pq9wVJlzWyz9TPpyTtkfS/ktpGzGuqXoc1+0eLSNog6bCk3SW1MyVtkfRi+jmnkT2WkjRf0pOS9qbHwhdSvel6lnSapGclPZ96/WqqL5D0THpM/DBduNIcIsLDJAfgd0rG/wb4bhpfDjwOCLgIeKbRvaa+LgVOTuNfB76exs8FngdOBRYAvwRmNLjXP6L4prxeoK2k3nS9pr5mpF4+AJySejy30X2N6PFPgQuA3SW1fwDWpvG1w4+JZhiAucAFafx9wH+k33/T9Zz+1lvS+EzgmfS3vwlYmerfBf6q0b0ODz7Sr0BE/KZk8nRg+NXwFcD9UbQNmC1pbt0bHCEifhwRQ2lyG8X3R0Cx3+6IOB4RLwP9FD8qo2EiYl9EjPYu7KbrNXnno0Ui4r+B4Y8WaRoR8RRwZER5BbAxjW8ErqxrU+OIiIMR8bM0/gawD5hHE/ac/tYH0+TMNARwMfBgqjdFr8Mc+hWSdIekV4Grga+k8jzg1ZLFDqRaM/ksxWcjMD36HdasvTZrXxMpRMTBNP4aUGhkM2OR1AqcT/EIuil7ljRD0k7gMLCF4jO/10sOtJrqMeHQH4Okn0jaPcqwAiAibo6I+cADwOrxt1Z7E/WblrkZGKLYc8OU06vVTxTPQTTdtduSWoAfAV8c8ey6qXqOiLcjYjHFZ9BLgA81uKVxNd1n7zSLiPizMhd9ANgM3EIDP2Zion4lXQv8OXBJ+oOBBvU7ifu2VLN+hEez9jWRQ5LmRsTBdArycKMbKiVpJsXAfyAiHkrlpu45Il6X9CTwUYqndk9OR/tN9ZjwkX4FJC0smVwB/CKN9wCfSVfxXAQcK3k62jCSlgF/B/xFRLxZMqsHWCnpVEkLgIXAs43osQzN2ut0/WiRHmBVGl8FPNLAXt5FkoD1wL6I+FbJrKbrWdLZw1fDSZpF8btC9gFPAp9MizVFr+9o9CvJ03GgeASyG9gF/CswL/7/lfx7KJ7T66Pk6pMG99tP8bzzzjR8t2TezanfF4DLm6DXT1A8B3ocOAQ80ay9lvS1nOIVJr8Ebm50P6P09wPgIPA/6b69HvhdYCvwIvAT4MxG91nS759QPHWzq+Qxu7wZewb+GPh56nU38JVU/wDFg5J+4F+AUxvd6/Dgj2EwM8uIT++YmWXEoW9mlhGHvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRv4PVCMY6yoPyiwAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], "source": [ - "### Goodness Metric\n", - "temp_goodness = sqrt( (noaa_min_temp - circle_min_temp)^2 + (noaa_max_temp - circle_max_temp)^2 )" + "noaa_max_temps.hist(bins = 20)" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 319, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAEICAYAAACzliQjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3deZwU1bn/8c/DOi4ICsQgcO8QgiwDCDggCkbCEtSLLGoQl4hi5LqggkYFcUtcriYKuODCzwW8IaAiBKPXBVmCGhUYGRWCKCqroCyCjICyPL8/6szYjLP0MDPMUt/369WvqTqn6tQ53T1PnT51utrcHRERiYcqZV0BERE5eBT0RURiREFfRCRGFPRFRGJEQV9EJEYU9EVEYkRBvwyZ2VIz61bW9ShLZtbczDLNbLuZXV3W9SlpZjbPzH5f1vUQyaagX0rMbKWZ9cyVdpGZvZW97u5p7j6vkHJSzczNrFopVbWs3QDMdfda7v5gXhuYWW8zmx9ODBvN7J9m1jfkXWRme80sKzw+N7PLc+3fwMyeNLP1oYyPzeyPZnbYQWhfvszs9vDaDkxIqxbSUhPSTjKzOaHu28zsH2bWKo/yLgr7nlPAMeeZ2TdmVrOQus0LZR2XK31GSO9WhKYWyMz+I+H1ywrlf5ewfnJJHUsU9GOvHJxM/hNYml+mmZ0NPA88AzQCjgZuBc5I2Owddz/c3Q8HzgL+bGbtw/5HAe8AhwAnunstoBdQB2ha8s0psi3AH82sal6ZZnYi8DowEzgGaAJ8ALxtZr/ItfngUN6F+ZSVCpwMONA3ibp9kliWmdUFTgQ2JrFv0tx9dfbrF15DgOMS0t4syePFnYJ+GUr8NGBmncxskZl9a2ZfmdmYsNn88Hdr6PWcaGZNQ89vs5ltMrPJZlYnodwOZrY49AyfN7NnzezOkNfNzNaa2Y1mtgF42syONLOXQi/6m7DcKKG8eWZ2p5n9K9ThH2ZWNxz3WzNbmNgzzaOdfcNQ1tZQVsuQPgf4NfBwKPfYXPsZMAa4w92fcPdt7r7P3f/p7pfmdSx3XwwsA1qGpGuB7cAF7r4ybLPG3a9x9w/zqe/zZrYh9Krnm1laQt5EMxtvZi+H5/c9M2uakN8rfJLYZmYPA5bf8xK8CvwAXJBP/p+BZ9z9AXff7u5b3P1m4F3g9oTj/idwCjAU6G1mP8+jrAvDfhOJThCFmQyck3BCOheYEeqbfdxOZvZOeG3Xm9nDZlYj5J0U3p+Nw/px4f3VIoljZ5df08zuM7PV4f/iMTM7JORlv5dvMLOvw/H7m9npZvaJmW0xs5sSyrrdzKaF/4ftZvZ+4ieZ8D+xLuQtN7MeydazQnF3PUrhAawEeuZKuwh4K69tiHqjvwvLhwOdw3IqUc+sWsJ+vyTqrdYE6hOdGMaFvBrAKuAaoDpwJtE/6Z0hvxuwB7g37H8IUJeoh3woUIuoZ/33hOPNA1YQ9YxrA/8m6gX2BKoR9cKfzud5OBb4LtS3OtFwzgqgRkLZv89n3xah7U0KeJ5zP6cdga3AsWH9XeCPRXzthoTnoSYwDshMyJsIbAY6hbZPBqaGvHpEJ5izQ1tHhOc6v/bdDvyVqNf9edinWmhzang99gK/zmPfi4H1Ceu3AAvC8kfAdXnsswK4Ajge2A0cXcBzMA/4PdGnjNNC2gKinv5aoFtIOx7oHOqdSnTCHZ5Qzl3AnPA++wgYlsTz78Avw/JY4EXgqPCa/AP4n1zv5VvDc3cp0aeQv4Vt04Cd2e+f8HzvTnh9/gB8EZabA2uAYxL+75qWdRwpjUeZV6CyPogCelYIQNmPHeQf9OcDfwTq5SonlVxBP49j9QcWh+VfAesAS8h/i/2D/g9ASgHltQO+SVifB4xOWL8feCVh/QwSAmOusm4BnktYrxLq1y2h7PyCYpfQ9oLqelH4x99KFHAdeCi7/cCnwGXFeB3rhDJrh/WJwBMJ+acDH4flC4F3E/KMKEAWGPTD8nvA5ewf9BuF5RZ57HsqsDth/VNCsAVGAR/k2r4rUcCrF9Y/BkYU0O55REH/AmAK0Qn4k5CXE/Tz2G84MCNhvTqQQRTwX018XxZwbCfq2BhRh6FpQt6JwBcJ7+WdQNWwXivse0LC9hlA/4TnO/H1qQKsJxry+iXwNVFHpvqBvl8qwkPDO6Wrv7vXyX4Q9bLycwlRr/jjMFzSJ78NzexoM5saPop+S9RbrBeyjwHWeXhXB2tyFbHR3XcllHeomT1uZqtCefOBOrnGmb9KWN6Zx/rh5O0Yok8eALj7vlCfhvm1L8Hm8LdBIdu9G57jWsDPiXp4dyeUUdj+OcysqpndY2afhediZciql7DZhoTlHfzY9mNIeK7Da5D7uc/PzcBoICUh7RtgXz71bwBsCnXuQjTWPzXk/Q1oY2btErYfDLzu7psStklmiGc60B0YBvxv7kwzOzYMB24Iz9fdJDxX7r6b6ETZGrg/1/uyMPWJPu1khOGjrUQnjvoJ22x2971heWf4W9B7M/H12Ud0AjvG3VcQnbBuB74O/1/HFKGuFYaCfjnh7p+6+7nAz4iGXqZZNLskr3+Su0N6G3c/gqg3lj12vB5oGMbDszXOfbhc69cRfbw9IZT3q5Be2Hh0Mr4kulgbFRjVqzFRb78wy4n+Sc9K9mDu/hXwAj9e6H0DGGBmyb7XzwP6EfX4ahP1uCG552I9Cc91QlsL5e6z+HH4JTvtO6Jhv9/msctAYHZYHhzqlxmu07yXkE4YAx8InBKC8waioafjLNfsnDzqtQN4hehTyE+CPvAo0aeGZuG9cxMJz5WZNQRuA54G7rdCZg3lsokoaKcldJ5q+48Xew9E4utThejT1JcA7v43d+9K9H51ov/DSkdBv5wwswvMrH7ofWwNyfuIxij3AYkzNWoRDR1tC/9U1yfkvUM0DjzMoul//YjGnwtSi+ifa6tFs11uK3aDfvQc8F9m1sPMqhOdYL4H/lXYjqFXeC1wi5ldbGZHmFkVM+tqZhPy2seiGSYD+HFG0BjgCGBSuNiJmTU0szFm1jaPImqF+m0m6mXencc2+XkZSDOzMy2aFXU10SePZI0muuaRaCQw2MyuNrNaFl10v5NomOOPZpZCFNCHEg3LZT+uAs4L9ehP9J5olZDfEniTfGb65HITcIqHC+G51AK+BbLCBdqc6bLhpDcReJLok+x64I4kjgfk9MT/HzDWzH4WymxoZr2TLSMPxye8PsOJXut3Lfq+SPdwUtpF9P+wrxjHKbcU9MuPU4GlZpYFPAAMcvedoad1F9EUva1m1plo7L8DsI0o0EzPLsTdfyC6eHsJ0cnjAuAlojd3fsYRXWjbRHTh89WSapS7Lw91eCiUfwZwRqhnMvtPA84hurj6JdFH9zuJpjBmO9HCnG6iC4kbiYIe7r4FOIloPPs9M9tO1EPeRtSzzu0ZouGodUQXrN8tQls3EfXK7yE6aTQD3i7C/m8TXSxNTHsL6E30mq4PdWsPdHX3T4kC+k6iGT4bsh/AU0TXB04l6vE/7dHUyMRtHgbOt0Km7br7l6EeefkD0aej7UQB+tmEvKuJPrneEk7gFwMXW9Hm3d9I9Dq9G4aP3iD6VHqgZhK9n74BfgecGYagahK9bpuIhu9+RnRtpNLJvtgllZiZvQc85u5Pl3VdRMqKmd1ONCsov+mxsaCefiVkZqeY2c/D8M5goC0l2HsXkYqrrL+NKaWjOdFY+mFE87/Pdvf1ZVslESkPNLwjIhIjGt4REYmRcj28U69ePU9NTS3raoiIVCgZGRmb3L1+XnnlOuinpqayaNGisq6GiEiFYmar8svT8I6ISIwo6IuIxIiCvohIjJTrMX0ROTC7d+9m7dq17Nq1q/CNpcJKSUmhUaNGVK9ePel9FPRFKqG1a9dSq1YtUlNT2f+Gq1JZuDubN29m7dq1NGnSJOn9NLwjUgnt2rWLunXrKuBXYmZG3bp1i/xpTkFfpJJSwK/8DuQ1VtAXEYkRjemLxMDYWZ+UaHkjeh1b6DYbNmxg+PDhLFy4kDp16nD00Uczbtw4atSoQcuWLWnevDnuzmGHHcbTTz9N8+bRbfJfeeUVbrnlFnbs2EHNmjXp3r07999//35lT5w4kYsvvphZs2bRs2dPAP7+978zYMAAnn/+ec4+++wDbtuVV17J22+/zQ8//MAXX3yRU6+bb765WOWWFwr6IqWkuIE2mcBaXrk7AwYMYPDgwUydGv107wcffMBXX31F48aNadq0KZmZmQA8/vjj3H333UyaNIklS5YwbNgwXn75ZVq0aMHevXuZMCHPH0mjTZs2TJ06NSfoT5kyheOOK/DXH5Myfvx4AFauXEmfPn1y6llZaHhHRErc3LlzqV69OpdddllO2nHHHcfJJ//0R7O+/fZbjjzySAD+/Oc/M3r0aFq0aAFA1apVufzyy3+yD8DJJ5/MggUL2L17N1lZWaxYsYJ27X78Lfg//elPdOzYkdatWzN06FDcnT179tCxY0fmzZsHwKhRoxg9enSh7fnuu+8YMmQInTp1on379sycGf1w28SJE+nfvz+9evUiNTWVhx9+mDFjxtC+fXs6d+7Mli1bAOjWrRvXXHMN7dq1o3Xr1ixYEP1A2j//+U/atWtHu3btaN++Pdu3by+0LsWloC8iJW7JkiUcf/zx+eZ/9tlntGvXjqZNmzJmzBiuvfbapPZLZGb07NmT1157jZkzZ9K3b9/98ocNG8bChQtZsmQJO3fu5KWXXqJatWpMnDiRyy+/nDfeeINXX32V224r/Ceh77rrLrp3786CBQuYO3cu119/Pd99911OnadPn87ChQsZPXo0hx56KIsXL+bEE0/kmWeeySljx44dZGZm8sgjjzBkyBAA7rvvPsaPH09mZiZvvvkmhxxySFJtLw4FfRE56LKHdz777DPGjRvH0KFDD6icQYMGMXXqVKZOncq55567X97cuXM54YQTaNOmDXPmzGHp0qUApKWl8bvf/Y4+ffrw1FNPUaNGjUKP8/rrr3PPPffQrl07unXrxq5du1i9ejUAv/71r6lVqxb169endu3anHHGGUA0/LRy5cqcMrLr96tf/Ypvv/2WrVu30qVLF6699loefPBBtm7dSrVqpT/irqAvIiUuLS2NjIyMpLbt27cv8+fPL/J+AJ06deKjjz5i06ZNHHvsj9dAdu3axRVXXMG0adP46KOPuPTSS/ebz/7RRx9Rp04dvv7666SO4+688MILZGZmkpmZyerVq2nZsiUANWvWzNmuSpUqOetVqlRhz549OXm5p1eaGSNHjuSJJ55g586ddOnShY8//jjpth8oBX0RKXHdu3fn+++/3+8i7Icffsibb775k23feustmjZtCsD111/P3XffzSefRBfB9+3bx2OPPVbgse655x7uvvvu/dKyA3y9evXIyspi2rRpOXnTp09ny5YtzJ8/n6uuuoqtW7cW2p7evXvz0EMPkf1Lg4sXLy50n9yeffZZIGpv7dq1qV27Np999hlt2rThxhtvpGPHjgcl6Gv2jkgMHOyZQGbGjBkzGD58OPfeey8pKSmkpqYybtw44McxfXenRo0aPPHEEwC0bduWcePGce6557Jjxw7MjD59+hR4rNNOO+0naXXq1OHSSy+ldevW/PznP6djx44AbNq0iZEjRzJ79mwaN27MsGHDuOaaa5g0aVKBx7jlllsYPnw4bdu2Zd++fTRp0oSXXnqpSM9JSkoK7du3Z/fu3Tz11FMAjBs3jrlz51KlShXS0tLybEtJK9e/kZuenu76ERWpqMpyyuayZctyhh+k7HXr1o377ruP9PT0Ei87r9fazDLcPc+DaXhHRCRGNLwjIlLKsr8XUB6opy8iEiMK+iIiMaKgLyISIwr6IiIxogu5InEw939Ktrxfjyp0EzPj2muvzbkt8n333UdWVha33347ABMmTGDMmDEAHHHEEYwZM4auXbvm7J+ZmUn79u155ZVXOPXUU/cre9OmTTRo0ICHHnpov5u6JerWrRuff/45q1atyvk2bP/+/XnjjTfIysoqcpOzbd68mR49egDR7aOrVq1K/fr1AViwYEFSt3UoS+rpi0ipqFmzJtOnT2fTpk0/yXvppZd4/PHHeeutt/j444957LHHOO+889iwYUPONlOmTKFr165MmTLlJ/s///zzdO7cOc+8RHXq1OHtt98GYOvWraxfv76YrYK6devm3I7hsssuY8SIETnr5T3gg4K+iJSSatWqMXToUMaOHfuTvHvvvZe//OUv1KtXD4AOHTowePDgnHvZuzvPP/88EydOZNasWT/5HdgpU6Zw//33s27dOtauXZtvHbJvyAbR7RfOPPPMnLysrCx69OhBhw4daNOmTc7tkhcuXEjbtm3ZtWsX3333HWlpaSxZsqTQ9mZkZHDKKadw/PHH07t375wTTLdu3RgxYgTp6em0bNmShQsXcuaZZ9KsWTNuvvlmILp3f4sWLTj//PNp2bIlZ599Njt27ABg5MiRtGrVirZt2/KHP/yh0HoURkFfRErNlVdeyeTJk9m2bdt+6UuXLv3JLZTT09Nz7oT5r3/9iyZNmtC0aVO6devGyy+/nLPdmjVrWL9+PZ06dWLgwIE597TJS48ePZg/fz579+5l6tSpnHPOOTl5KSkpzJgxg/fff5+5c+dy3XXX4e507NiRvn37cvPNN3PDDTdwwQUX0Lp16wLbuXv3bq666iqmTZtGRkYGQ4YM2e8+/TVq1GDRokVcdtll9OvXj/Hjx7NkyRImTpzI5s2bAVi+fDlXXHEFy5Yt44gjjuCRRx5h8+bNzJgxg6VLl/Lhhx/mnCSKQ0FfRErNEUccwYUXXsiDDz5YpP2mTJnCoEGDgKi3njiM8+yzzzJw4MA883KrWrUqXbt2ZerUqezcuZPU1NScPHfnpptuom3btvTs2ZN169bx1VdfAXDrrbcya9YsFi1axA033FBofZcvX86SJUvo1asX7dq1484779zvE0j2vf7btGlDWloaDRo0oGbNmvziF79gzZo1ADRu3JguXboAcMEFF+TcmC0lJYVLLrmE6dOnc+ihhybz9BVIF3JFpFQNHz6cDh06cPHFF+ektWrVioyMDLp3756TlpGRQVpaGnv37uWFF15g5syZ3HXXXbg7mzdvZvv27dSqVYspU6awYcMGJk+eDMCXX37Jp59+SrNmzfI8/qBBgxgwYEDOBeRskydPZuPGjWRkZFC9enVSU1NzhpE2b95MVlYWu3fvZteuXRx22GEFttHdSUtL45133skzP/F2y7lvxZx9++W8br1crVo1FixYwOzZs5k2bRoPP/wwc+bMKbAuhVFPX0RK1VFHHcXAgQN58sknc9JuuOEGbrzxxpyhjczMTCZOnMgVV1zB7Nmzadu2LWvWrGHlypWsWrWKs846ixkzZvDJJ5+QlZXFunXrWLlyJStXrmTUqFEF9vZPPvlkRo0a9ZMfWdm2bRs/+9nPqF69OnPnzmXVqlU5ef/93//NHXfcwfnnn8+NN95YaBubN2/Oxo0bc4L+7t27c4aqkrV69eqc/f/2t7/RtWtXsrKy2LZtG6effjpjx47lgw8+KFKZeVFPXyQOkphiWZquu+46Hn744Zz1vn37sm7dOk466STMjFq1avHXv/6VBg0acNNNNzFgwID99j/rrLN49NFH+eKLL/LMO+ecc7j11lvzPLaZ5XkB9Pzzz+eMM86gTZs2pKen5/wu7zPPPEP16tU577zz2Lt3LyeddBJz5szZ71NJbjVq1GDatGlcffXVbNu2jT179jB8+HDS0tKSfo6aN2/O+PHjGTJkCK1ateLyyy9n27Zt9OvXj127duHuOVNci0O3VhYpJbq1siRr5cqV9OnTJ6lZQrnp1soiIpIvBX0RkTKWmpp6QL38A6GgL1JJleehWykZB/IaK+iLVEIpKSls3rxZgb8Sy57KmpKSUqT9NHtHpBJq1KgRa9euZePGjWVdFSlFKSkpNGrUqEj7KOiLVELVq1enSZMmZV0NKYeSGt4xsxFmttTMlpjZFDNLMbMmZvaema0ws2fNrEbYtmZYXxHyUxPKGRXSl5tZ79JpkoiI5KfQoG9mDYGrgXR3bw1UBQYB9wJj3f2XwDfAJWGXS4BvQvrYsB1m1irslwacCjxiZlVLtjkiIlKQZC/kVgMOMbNqwKHAeqA7MC3kTwL6h+V+YZ2Q38Oim0r0A6a6+/fu/gWwAuhU/CaIiEiyCg367r4OuA9YTRTstwEZwFZ33xM2Wws0DMsNgTVh3z1h+7qJ6XnsIyIiB0EywztHEvXSmwDHAIcRDc+UCjMbamaLzGyRZh6IiJSsZIZ3egJfuPtGd98NTAe6AHXCcA9AI2BdWF4HNAYI+bWBzYnpeeyTw90nuHu6u6dn/+6kiIiUjGSC/mqgs5kdGsbmewD/BuYCZ4dtBgMzw/KLYZ2QP8ejb4i8CAwKs3uaAM2ABSXTDBERSUah8/Td/T0zmwa8D+wBFgMTgJeBqWZ2Z0jLvln2k8D/mtkKYAvRjB3cfamZPUd0wtgDXOnue0u4PSIiUoCkvpzl7rcBt+VK/pw8Zt+4+y7gt/mUcxdwVxHrKCIiJUT33hERiREFfRGRGFHQFxGJEQV9EZEYUdAXEYkRBX0RkRhR0BcRiREFfRGRGFHQFxGJEQV9EZEY0W/kipRTY2d9Uqz9R/Q6toRqIpWJevoiIjGioC8iEiMK+iIiMaKgLyISIwr6IiIxoqAvIhIjCvoiIjGioC8iEiP6cpZIPor75SiR8kg9fRGRGFHQFxGJEQV9EZEYUdAXEYkRBX0RkRhR0BcRiREFfRGRGFHQFxGJEQV9EZEYUdAXEYkRBX0RkRhR0BcRiREFfRGRGFHQFxGJEQV9EZEYSSrom1kdM5tmZh+b2TIzO9HMjjKzWWb2afh7ZNjWzOxBM1thZh+aWYeEcgaH7T81s8Gl1SgREclbsj39B4BX3b0FcBywDBgJzHb3ZsDssA5wGtAsPIYCjwKY2VHAbcAJQCfgtuwThYiIHByFBn0zqw38CngSwN1/cPetQD9gUthsEtA/LPcDnvHIu0AdM2sA9AZmufsWd/8GmAWcWqKtERGRAiXT028CbASeNrPFZvaEmR0GHO3u68M2G4Cjw3JDYE3C/mtDWn7p+zGzoWa2yMwWbdy4sWitERGRAiUT9KsBHYBH3b098B0/DuUA4O4OeElUyN0nuHu6u6fXr1+/JIoUEZEgmaC/Fljr7u+F9WlEJ4GvwrAN4e/XIX8d0Dhh/0YhLb90ERE5SAoN+u6+AVhjZs1DUg/g38CLQPYMnMHAzLD8InBhmMXTGdgWhoFeA35jZkeGC7i/CWkiInKQVEtyu6uAyWZWA/gcuJjohPGcmV0CrAIGhm3/DzgdWAHsCNvi7lvM7A5gYdjuT+6+pURaISIiSUkq6Lt7JpCeR1aPPLZ14Mp8ynkKeKooFRQRkZKjb+SKiMSIgr6ISIwo6IuIxIiCvohIjCjoi4jEiIK+iEiMKOiLiMSIgr6ISIwo6IuIxIiCvohIjCR77x2RCmfsrE/Kugoi5Y56+iIiMaKevkglVdxPOiN6HVtCNZHyREFfRPKkk0blpOEdEZEYUdAXEYkRBX0RkRhR0BcRiREFfRGRGFHQFxGJEQV9EZEYUdAXEYkRBX0RkRhR0BcRiREFfRGRGFHQFxGJEQV9EZEYUdAXEYkRBX0RkRhR0BcRiREFfRGRGFHQFxGJEQV9EZEYUdAXEYkRBX0RkRhJOuibWVUzW2xmL4X1Jmb2npmtMLNnzaxGSK8Z1leE/NSEMkaF9OVm1rukGyMiIgUrSk//GmBZwvq9wFh3/yXwDXBJSL8E+Cakjw3bYWatgEFAGnAq8IiZVS1e9UVEpCiSCvpm1gj4L+CJsG5Ad2Ba2GQS0D8s9wvrhPweYft+wFR3/97dvwBWAJ1KohEiIpKcZHv644AbgH1hvS6w1d33hPW1QMOw3BBYAxDyt4Xtc9Lz2CeHmQ01s0Vmtmjjxo1FaIqIiBSm0KBvZn2Ar9094yDUB3ef4O7p7p5ev379g3FIEZHYqJbENl2AvmZ2OpACHAE8ANQxs2qhN98IWBe2Xwc0BtaaWTWgNrA5IT1b4j4iUsmMnfVJsfYf0evYEqqJJCq0p+/uo9y9kbunEl2InePu5wNzgbPDZoOBmWH5xbBOyJ/j7h7SB4XZPU2AZsCCEmuJiIgUKpmefn5uBKaa2Z3AYuDJkP4k8L9mtgLYQnSiwN2XmtlzwL+BPcCV7r63GMcXEZEiKlLQd/d5wLyw/Dl5zL5x913Ab/PZ/y7grqJWUkRESoa+kSsiEiMK+iIiMaKgLyISIwr6IiIxoqAvIhIjCvoiIjGioC8iEiMK+iIiMaKgLyISI8W5DYNIqSruDbtE5KfU0xcRiRH19EVKSefVE4q1/7v/MbSEaiLyI/X0RURiREFfRCRGFPRFRGJEQV9EJEYU9EVEYkRBX0QkRhT0RURiREFfRCRGFPRFRGJEQV9EJEYU9EVEYkT33hEpp3TvHikN6umLiMSIevoiUi4V9/cURvQ6toRqUrmopy8iEiMK+iIiMaKgLyISIwr6IiIxoqAvIhIjCvoiIjGioC8iEiMK+iIiMVLol7PMrDHwDHA04MAEd3/AzI4CngVSgZXAQHf/xswMeAA4HdgBXOTu74eyBgM3h6LvdPdJJdscEZGIvtyVt2S+kbsHuM7d3zezWkCGmc0CLgJmu/s9ZjYSGAncCJwGNAuPE4BHgRPCSeI2IJ3o5JFhZi+6+zcl3SiRklDce9+IlEeFDu+4+/rsnrq7bweWAQ2BfkB2T30S0D8s9wOe8ci7QB0zawD0Bma5+5YQ6GcBp5Zoa0REpEBFGtM3s1SgPfAecLS7rw9ZG4iGfyA6IaxJ2G1tSMsvPfcxhprZIjNbtHHjxqJUT0RECpF00Dezw4EXgOHu/m1inrs70ZBNsbn7BHdPd/f0+vXrl0SRIiISJBX0zaw6UcCf7O7TQ/JXYdiG8PfrkL4OaJywe6OQll+6iIgcJIUG/TAb50lgmbuPSch6ERgclgcDMxPSL7RIZ2BbGAZ6DfiNmR1pZkcCvwlpIiJykCQze6cL8NLU0ggAAAY7SURBVDvgIzPLDGk3AfcAz5nZJcAqYGDI+z+i6ZoriKZsXgzg7lvM7A5gYdjuT+6+pURaISIlTr/cVTkVGvTd/S3A8snukcf2DlyZT1lPAU8VpYIicmA05VTyom/kiojEiH4uUUQkD5X1G73q6YuIxIh6+lJpaUxb5KfU0xcRiREFfRGRGNHwjpSa4l4IE5GSp56+iEiMqKcvIqVC3+gtn9TTFxGJEQV9EZEYUdAXEYkRjelLvjT7RqTyUU9fRCRGFPRFRGJEQV9EJEYU9EVEYkRBX0QkRjR7R8ot3RpZpOSppy8iEiPq6YtIuaR795QO9fRFRGJEQV9EJEY0vCMilZKGh/Kmnr6ISIyopy8iUgqKe8PCEb2OLaGa7E89fRGRGFFPvxLTrZFFJDcFfRGRPFTWC8EK+lJqdBsFkfJHQb8c0/CMiJQ0XcgVEYkR9fQlXxqeEal81NMXEYkR9fRLkcbkRaS8OehB38xOBR4AqgJPuPs9B7sOcaHhGZGyU/z/v/tKpB65HdSgb2ZVgfFAL2AtsNDMXnT3fx/MeiRLPXURqWwOdk+/E7DC3T8HMLOpQD+gVIJ+cYO2esoiUtkc7KDfEFiTsL4WOCFxAzMbCmR/lS3LzJYX43j1gE3F2L+8qCztALWlPKos7YDK1Jbf31+ctvxnfhnl7kKuu08ASqSLbWaL3D29JMoqS5WlHaC2lEeVpR2gtiTjYE/ZXAc0TlhvFNJEROQgONhBfyHQzMyamFkNYBDw4kGug4hIbB3U4R1332Nmw4DXiKZsPuXuS0vxkJXlSmxlaQeoLeVRZWkHqC2FMncvjXJFRKQc0m0YRERiREFfRCRGKl3QN7M7zOxDM8s0s9fN7JiQbmb2oJmtCPkdyrquhTGzv5jZx6G+M8ysTkLeqNCW5WbWuyzrmQwz+62ZLTWzfWaWniuvorXl1FDXFWY2sqzrUxRm9pSZfW1mSxLSjjKzWWb2afh7ZFnWMVlm1tjM5prZv8N765qQXqHaY2YpZrbAzD4I7fhjSG9iZu+F99mzYfJL8bl7pXoARyQsXw08FpZPB14BDOgMvFfWdU2iLb8BqoXle4F7w3Ir4AOgJtAE+AyoWtb1LaQtLYHmwDwgPSG9QrWFaALCZ8AvgBqh7q3Kul5FqP+vgA7AkoS0PwMjw/LI7PdZeX8ADYAOYbkW8El4P1Wo9oSYdHhYrg68F2LUc8CgkP4YcHlJHK/S9fTd/duE1cOA7CvV/YBnPPIuUMfMGhz0ChaBu7/u7nvC6rtE32uAqC1T3f17d/8CWEF0i4tyy92XuXte366uaG3JuZWIu/8AZN9KpEJw9/nAllzJ/YBJYXkS0P+gVuoAuft6d38/LG8HlhF9679CtSfEpKywWj08HOgOTAvpJdaOShf0AczsLjNbA5wP3BqS87oFRMODXbdiGEL0SQUqflsSVbS2VLT6JuNod18fljcAR5dlZQ6EmaUC7Yl6yRWuPWZW1cwyga+BWUSfJrcmdPpK7H1WIYO+mb1hZkvyePQDcPfR7t4YmAwMK9vaFqywtoRtRgN7iNpTbiXTFinfPBpLqFDzuM3scOAFYHiuT/oVpj3uvtfd2xF9mu8EtCitY5W7e+8kw917JrnpZOD/gNsop7eAKKwtZnYR0AfoEd7AUEHbko9y2ZYCVLT6JuMrM2vg7uvDkOfXZV2hZJlZdaKAP9ndp4fkCtsed99qZnOBE4mGoKuF3n6Jvc8qZE+/IGbWLGG1H/BxWH4RuDDM4ukMbEv4CFguhR+cuQHo6+47ErJeBAaZWU0zawI0AxaURR1LQEVrS2W8lciLwOCwPBiYWYZ1SZqZGfAksMzdxyRkVaj2mFn97Jl5ZnYI0e+NLAPmAmeHzUquHWV95boUroS/ACwBPgT+ATRMuEI+nmis7CMSZpCU1wfRRc01QGZ4PJaQNzq0ZTlwWlnXNYm2DCAal/we+Ap4rQK35XSimSKfAaPLuj5FrPsUYD2wO7welwB1gdnAp8AbwFFlXc8k29KVaOjmw4T/kdMrWnuAtsDi0I4lwK0h/RdEHaAVwPNAzZI4nm7DICISI5VueEdERPKnoC8iEiMK+iIiMaKgLyISIwr6IiIxoqAvIhIjCvoiIjHy/wGJzVdqr5Bt1QAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], "source": [ - "temp_df['temp_goodness'] = round(np.sqrt(((temp_df['noaa_min_temp'] - temp_df['circle_min_temp'])**2) + ((temp_df['noaa_max_temp'] - temp_df['circle_max_temp'])**2)),2)" + "import random\n", + "import numpy\n", + "from matplotlib import pyplot\n", + "\n", + "x = cbc_max_temps.tolist()\n", + "y = noaa_max_temps.tolist()\n", + "\n", + "bins = numpy.linspace(-30, 30, 20)\n", + "\n", + "pyplot.hist(x, bins, alpha=0.5, label='CBC Max Temps')\n", + "pyplot.hist(y, bins, alpha=0.5, label='NOAA Max Temps')\n", + "pyplot.legend(loc='upper right')\n", + "pyplot.title(\"Histagram of CBC and NOAA Max Temps\")\n", + "pyplot.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Catagories\n", - "Values in catagories can be changed and then applied to dataframe" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "# Function to assign grade scores\n", - "def assign_grade(metric_score):\n", - " if metric_score <= excellent_score:\n", - " return 'excellent'\n", - " elif metric_score <= good_score:\n", - " return 'good'\n", - " elif metric_score <= fair_score:\n", - " return 'fair'\n", - " else:\n", - " return 'poor'" + "#### Preform a two sample T Test " ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 321, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Ttest_indResult(statistic=-4.415955625255944, pvalue=1.0088112774678262e-05)" + ] + }, + "execution_count": 321, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Applying the scores\n", - "temp_df['goodness_grade'] = temp_df['temp_goodness'].apply(lambda metric_score: assign_grade(metric_score))" + "scipy.stats.ttest_ind(cbc_max_temps.tolist(), noaa_max_temps.tolist(), equal_var=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "---" + "# Goodness Metric for Max and Min Temp points " ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 334, "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The goodness Metric of the closest stations were: \n", + "poor 59144\n", + "excellent 6003\n", + "good 2639\n", + "fair 604\n", + "Name: goodness_closest, dtype: int64\n", + "The goodness Metric of the closest stations as percentage: \n", + "poor 86.480480\n", + "excellent 8.777599\n", + "good 3.858751\n", + "fair 0.883170\n", + "Name: goodness_closest, dtype: float64\n" + ] + } + ], "source": [ - "## Ecosystem Split\n", - "Creating two dataframes based on matching min and macro ecosystems" + "# The Closest Stations' Goodness metric was, \n", + "print(\"The goodness Metric of the closest stations were: \")\n", + "print(circle_data.goodness_closest.value_counts(dropna = False))\n", + "print(\"The goodness Metric of the closest stations as percentage: \")\n", + "print(circle_data.goodness_closest.value_counts(dropna = False) / circle_data.goodness_closest.size * 100)" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 335, "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The goodness Metric of the closest stations were: \n", + "poor 58929\n", + "excellent 6062\n", + "good 2535\n", + "fair 602\n", + "NaN 262\n", + "Name: goodness_lowest, dtype: int64\n", + "The goodness Metric of the closest stations as percentage: \n", + "poor 86.166106\n", + "excellent 8.863869\n", + "good 3.706682\n", + "fair 0.880246\n", + "NaN 0.383097\n", + "Name: goodness_lowest, dtype: float64\n" + ] + } + ], "source": [ - "#### Specific Ecosystem Match" + "# The Lowesr Stations' Goodness metric was, \n", + "print(\"The goodness Metric of the closest stations were: \")\n", + "print(circle_data.goodness_lowest.value_counts(dropna = False))\n", + "print(\"The goodness Metric of the closest stations as percentage: \")\n", + "print(circle_data.goodness_lowest.value_counts(dropna = False) / circle_data.goodness_lowest.size * 100)\n" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 336, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Number of rows before specific ecosystem match: 16986\n", - "Number of rows after specific ecosystem match: 16160\n", - "\n", - "Number of rows lost: 826\n" + "The goodness Metric of ALL stations were: \n", + "poor 61828\n", + "NaN 3515\n", + "excellent 2040\n", + "good 809\n", + "fair 198\n", + "Name: goodness_mode, dtype: int64\n", + "The goodness Metric of the closest stations as percentage: \n", + "poor 90.405030\n", + "NaN 5.139640\n", + "excellent 2.982892\n", + "good 1.182921\n", + "fair 0.289516\n", + "Name: goodness_mode, dtype: float64\n" ] } ], "source": [ - "temp_df_specific_ecosystems = temp_df.loc[temp_df['specific_circle_ecosystem'].isna() == temp_df['specific_station_ecosystem'].isna()]\n", - "print(f'Number of rows before specific ecosystem match: {temp_df.shape[0]}')\n", - "print(f'Number of rows after specific ecosystem match: {temp_df_specific_ecosystems.shape[0]}')\n", - "print()\n", - "print(f'Number of rows lost: {temp_df.shape[0] - temp_df_specific_ecosystems.shape[0]}')" + "# The Mode of all Stations for for each Circles were\n", + "print(\"The goodness Metric of ALL stations were: \")\n", + "print(circle_data.goodness_mode.value_counts(dropna = False))\n", + "print(\"The goodness Metric of the closest stations as percentage: \")\n", + "print(circle_data.goodness_mode.value_counts(dropna = False) / circle_data.goodness_mode.size * 100)\n" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 337, "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The goodness Metric of NLCD stations were: \n", + "poor 33962\n", + "NaN 29820\n", + "excellent 3104\n", + "good 1232\n", + "fair 272\n", + "Name: n_goodness_mode, dtype: int64\n", + "The goodness Metric of the closest stations as percentage: \n", + "poor 49.659307\n", + "NaN 43.602866\n", + "excellent 4.538675\n", + "good 1.801433\n", + "fair 0.397719\n", + "Name: n_goodness_mode, dtype: float64\n" + ] + } + ], "source": [ - "#### Macro Ecosystem Match" + "# The Mode of all NLCD Stations for for each Circles were\n", + "print(\"The goodness Metric of NLCD stations were: \")\n", + "print(circle_data.n_goodness_mode.value_counts(dropna = False))\n", + "print(\"The goodness Metric of the closest stations as percentage: \")\n", + "print(circle_data.n_goodness_mode.value_counts(dropna = False) / circle_data.n_goodness_mode.size * 100)\n" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 338, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Number of rows before macro ecosystem match: 16986\n", - "Number of rows after macro ecosystem match: 14798\n", - "\n", - "Number of rows lost: 2188\n" + "The goodness Metric of NLCD stations were: \n", + "NaN 36555\n", + "poor 27484\n", + "excellent 2945\n", + "good 1157\n", + "fair 249\n", + "Name: e_goodness_mode, dtype: int64\n", + "The goodness Metric of the closest stations as percentage: \n", + "NaN 53.450797\n", + "poor 40.187162\n", + "excellent 4.306185\n", + "good 1.691768\n", + "fair 0.364088\n", + "Name: e_goodness_mode, dtype: float64\n" ] } ], "source": [ - "temp_df_macro_ecosystems = temp_df.loc[temp_df['macro_circle_ecosystem'].isna() == temp_df['macro_station_ecosystem'].isna()]\n", - "print(f'Number of rows before macro ecosystem match: {temp_df.shape[0]}')\n", - "print(f'Number of rows after macro ecosystem match: {temp_df_macro_ecosystems.shape[0]}')\n", - "print()\n", - "print(f'Number of rows lost: {temp_df.shape[0] - temp_df_macro_ecosystems.shape[0]}')" + "# The Mode of all ECOSYS Stations for for each Circles were\n", + "print(\"The goodness Metric of NLCD stations were: \")\n", + "print(circle_data.e_goodness_mode.value_counts(dropna = False))\n", + "print(\"The goodness Metric of the closest stations as percentage: \")\n", + "print(circle_data.e_goodness_mode.value_counts(dropna = False) / circle_data.e_goodness_mode.size * 100)\n" ] }, { @@ -942,7 +4904,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 323, "metadata": {}, "outputs": [], "source": [ @@ -1035,9 +4997,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 324, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'temp_df_specific_ecosystems' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mgrade_figure\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtemp_df_specific_ecosystems\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'temp_df_specific_ecosystems' is not defined" + ] + } + ], "source": [ "grade_figure(temp_df_specific_ecosystems)" ] @@ -1057,6 +5031,27 @@ "source": [ "grade_figure(temp_df_macro_ecosystems)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -1075,7 +5070,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.9" + "version": "3.7.1" } }, "nbformat": 4, From 029b793fe5096fa872cb9e734ba3acd2cfe0dcd3 Mon Sep 17 00:00:00 2001 From: ren Date: Thu, 8 Oct 2020 19:26:26 -0400 Subject: [PATCH 2/2] # Moved agreement threshold to top of notebook --- notebooks/2.4-je-temperature-summary.ipynb | 117 +++++++++++++++++++-- 1 file changed, 107 insertions(+), 10 deletions(-) diff --git a/notebooks/2.4-je-temperature-summary.ipynb b/notebooks/2.4-je-temperature-summary.ipynb index 20c5f5e..4ebbcc6 100644 --- a/notebooks/2.4-je-temperature-summary.ipynb +++ b/notebooks/2.4-je-temperature-summary.ipynb @@ -115,6 +115,16 @@ "poor_score = 20" ] }, + { + "cell_type": "code", + "execution_count": 275, + "metadata": {}, + "outputs": [], + "source": [ + "AGREEMENT_THRESHOLD = 0.75\n", + "AGREEMENT_THRESHOLD = max(AGREEMENT_THRESHOLD, 1 - AGREEMENT_THRESHOLD)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -804,6 +814,103 @@ "raw_data.tail()" ] }, + { + "cell_type": "code", + "execution_count": 339, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['circle_name',\n", + " 'country_state',\n", + " 'lat',\n", + " 'lon',\n", + " 'count_year',\n", + " 'count_date',\n", + " 'n_field_counters',\n", + " 'n_feeder_counters',\n", + " 'min_field_parties',\n", + " 'max_field_parties',\n", + " 'field_hours',\n", + " 'feeder_hours',\n", + " 'nocturnal_hours',\n", + " 'field_distance',\n", + " 'nocturnal_distance',\n", + " 'distance_units',\n", + " 'min_temp',\n", + " 'max_temp',\n", + " 'temp_unit',\n", + " 'min_wind',\n", + " 'max_wind',\n", + " 'wind_unit',\n", + " 'min_snow',\n", + " 'max_snow',\n", + " 'snow_unit',\n", + " 'am_cloud',\n", + " 'pm_cloud',\n", + " 'field_distance_imperial',\n", + " 'field_distance_metric',\n", + " 'nocturnal_distance_imperial',\n", + " 'nocturnal_distance_metric',\n", + " 'min_snow_imperial',\n", + " 'min_snow_metric',\n", + " 'max_snow_metric',\n", + " 'max_snow_imperial',\n", + " 'min_temp_imperial',\n", + " 'max_temp_imperial',\n", + " 'min_temp_metric',\n", + " 'max_temp_metric',\n", + " 'min_wind_metric',\n", + " 'max_wind_metric',\n", + " 'min_wind_imperial',\n", + " 'max_wind_imperial',\n", + " 'ui',\n", + " 'geohash_circle',\n", + " 'circle_id',\n", + " 'id',\n", + " 'latitude',\n", + " 'longitude',\n", + " 'elevation',\n", + " 'state',\n", + " 'name',\n", + " 'gsn_flag',\n", + " 'hcn_crn_flag',\n", + " 'wmoid',\n", + " 'geohash_station',\n", + " 'temp_min_value',\n", + " 'temp_max_value',\n", + " 'precipitation_value',\n", + " 'temp_avg',\n", + " 'snow',\n", + " 'snwd',\n", + " 'am_rain',\n", + " 'pm_rain',\n", + " 'am_snow',\n", + " 'pm_snow',\n", + " 'circle_elev',\n", + " 'elevation_source',\n", + " 'block_fips',\n", + " 'county_fips',\n", + " 'Ecosys_circle',\n", + " 'Usgsid_sys_circle',\n", + " 'Nlcd_code_circle',\n", + " 'Nlcd_circle',\n", + " 'Ecosys_station',\n", + " 'Usgsid_sys_station',\n", + " 'Nlcd_code_station',\n", + " 'Nlcd_station']" + ] + }, + "execution_count": 339, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(raw_data.columns)" + ] + }, { "cell_type": "code", "execution_count": 232, @@ -2123,16 +2230,6 @@ "circle_data = g.apply(temp_calc)" ] }, - { - "cell_type": "code", - "execution_count": 275, - "metadata": {}, - "outputs": [], - "source": [ - "AGREEMENT_THRESHOLD = 0.75\n", - "AGREEMENT_THRESHOLD = max(AGREEMENT_THRESHOLD, 1 - AGREEMENT_THRESHOLD)" - ] - }, { "cell_type": "markdown", "metadata": {},