From 6c5cb916dca3f8b644bcbd5d5ce2e75657d6d143 Mon Sep 17 00:00:00 2001 From: ren Date: Thu, 1 Oct 2020 18:40:42 -0400 Subject: [PATCH 1/2] Added code so that this analysis will mirror the work done by Hamza so the PowerPoint will be more concise --- notebooks/2.4-je-temperature-summary.ipynb | 4445 +++++++++++++++++++- 1 file changed, 4220 insertions(+), 225 deletions(-) diff --git a/notebooks/2.4-je-temperature-summary.ipynb b/notebooks/2.4-je-temperature-summary.ipynb index 9bda510..20c5f5e 100644 --- a/notebooks/2.4-je-temperature-summary.ipynb +++ b/notebooks/2.4-je-temperature-summary.ipynb @@ -4,7 +4,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# This is The Title of Notebook\n", + "# Temperture Analysis \n", + "\n", "### Purpose\n", "This notebook will look at comparing the usability of temperature readings between Christmas Bird Count Volunteers and NOAA Weather Stations.\n", "\n", @@ -58,7 +59,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 227, "metadata": {}, "outputs": [], "source": [ @@ -69,6 +70,8 @@ "from sklearn.metrics.pairwise import haversine_distances\n", "from sklearn.neighbors import DistanceMetric\n", "import plotly.graph_objects as go\n", + "from statistics import mode\n", + "import scipy\n", "\n", "#Options\n", "pd.set_option(\"display.max_columns\", 100)" @@ -90,7 +93,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 228, "metadata": {}, "outputs": [], "source": [ @@ -122,7 +125,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 229, "metadata": {}, "outputs": [], "source": [ @@ -132,14 +135,14 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 230, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "C:\\Users\\jacob\\anaconda3\\envs\\cmmdsjob\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3146: DtypeWarning: Columns (62,64,65,71,73,75,77) have mixed types.Specify dtype option on import or set low_memory=False.\n", + "/Users/rcdebaca/.pyenv/versions/funhacks371/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3063: DtypeWarning: Columns (62,64,65,71,73,75,77) have mixed types.Specify dtype option on import or set low_memory=False.\n", " interactivity=interactivity, compiler=compiler, result=result)\n" ] } @@ -150,7 +153,660 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 231, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
circle_namecountry_statelatloncount_yearcount_daten_field_countersn_feeder_countersmin_field_partiesmax_field_partiesfield_hoursfeeder_hoursnocturnal_hoursfield_distancenocturnal_distancedistance_unitsmin_tempmax_temptemp_unitmin_windmax_windwind_unitmin_snowmax_snowsnow_unitam_cloudpm_cloudfield_distance_imperialfield_distance_metricnocturnal_distance_imperialnocturnal_distance_metricmin_snow_imperialmin_snow_metricmax_snow_metricmax_snow_imperialmin_temp_imperialmax_temp_imperialmin_temp_metricmax_temp_metricmin_wind_metricmax_wind_metricmin_wind_imperialmax_wind_imperialuigeohash_circlecircle_ididlatitudelongitudeelevationstatenamegsn_flaghcn_crn_flagwmoidgeohash_stationtemp_min_valuetemp_max_valueprecipitation_valuetemp_avgsnowsnwdam_rainpm_rainam_snowpm_snowcircle_elevelevation_sourceblock_fipscounty_fipsEcosys_circleUsgsid_sys_circleNlcd_code_circleNlcd_circleEcosys_stationUsgsid_sys_stationNlcd_code_stationNlcd_station
756373SanningaruqUS-AK67.0833-162.966720042004-01-052.00.01.02.06.250.01.026.010.0Miles0.05.02.00.017.01.012.030.02.01.01.026.041.84100410.016.09269412.00000030.4876.2030.0000000.05.0-17.777778-15.0000000.00000027.3575800.000017.00067.0833-162.9667_2004b7ujb7ujes5USR0000AMTN67.1414-162.9944246.6AKMT. NOAK ALASKANaNb7ujNaNNaNNaNNaNNaNNaN333322.33ghcn_dNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
756374SanningaruqUS-AK67.1000-162.833319851985-01-016.0NaNNaNNaNNaN6.0NaNNaNNaNMiles25.032.0NaN10.025.0NaN6.036.0NaN6.06.0NaNNaNNaNNaN2.36220515.2491.4414.173228102.6115.2-3.8888890.00000016.09269440.2317356.214015.53567.1-162.8333_1985b7ujb7ujwxhUSR0000AMTN67.1414-162.9944246.6AKMT. NOAK ALASKANaNb7ujNaNNaNNaNNaNNaNNaN332242.54ghcn_dNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
756375SanningaruqUS-AK67.1000-162.833319861985-12-213.0NaNNaNNaNNaNNaNNaNNaNNaNMiles20.025.0NaN12.015.0NaN1.018.0NaN2.06.0NaNNaNNaNNaN0.3937012.5445.727.08661493.6102.6-6.666667-3.88888919.31123324.1390417.45689.32167.1-162.8333_1986b7ujb7ujwxhUSR0000AMTN67.1414-162.9944246.6AKMT. NOAK ALASKANaNb7ujNaNNaNNaNNaNNaNNaN332242.54ghcn_dNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
756376SanningaruqUS-AK67.1000-162.833319871986-12-187.0NaNNaNNaNNaNNaNNaNNaNNaNMiles28.033.0NaN10.010.0NaN18.030.0NaN7.07.0NaNNaNNaNNaN7.08661445.7276.2011.811024108.0117.0-2.2222220.55555616.09269416.0926946.21406.21467.1-162.8333_1987b7ujb7ujwxhUSR0000AMTN67.1414-162.9944246.6AKMT. NOAK ALASKANaNb7ujNaNNaNNaNNaNNaNNaN331142.54ghcn_dNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
756377Point BarrowUS-AK71.3333-156.666719761975-12-289.0NaNNaNNaNNaNNaNNaNNaNNaNMilesNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN71.3333-156.6667_1976bs8tbs8tfcyUSW0002750271.2833-156.78149.4AKBARROW POST ROGERS APGSN70026.0bs8t-267.0-206.03.0NaN3.0102.0NaNNaNNaNNaN0.31ghcn_dNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " circle_name country_state lat lon count_year count_date \\\n", + "756373 Sanningaruq US-AK 67.0833 -162.9667 2004 2004-01-05 \n", + "756374 Sanningaruq US-AK 67.1000 -162.8333 1985 1985-01-01 \n", + "756375 Sanningaruq US-AK 67.1000 -162.8333 1986 1985-12-21 \n", + "756376 Sanningaruq US-AK 67.1000 -162.8333 1987 1986-12-18 \n", + "756377 Point Barrow US-AK 71.3333 -156.6667 1976 1975-12-28 \n", + "\n", + " n_field_counters n_feeder_counters min_field_parties \\\n", + "756373 2.0 0.0 1.0 \n", + "756374 6.0 NaN NaN \n", + "756375 3.0 NaN NaN \n", + "756376 7.0 NaN NaN \n", + "756377 9.0 NaN NaN \n", + "\n", + " max_field_parties field_hours feeder_hours nocturnal_hours \\\n", + "756373 2.0 6.25 0.0 1.0 \n", + "756374 NaN NaN 6.0 NaN \n", + "756375 NaN NaN NaN NaN \n", + "756376 NaN NaN NaN NaN \n", + "756377 NaN NaN NaN NaN \n", + "\n", + " field_distance nocturnal_distance distance_units min_temp max_temp \\\n", + "756373 26.0 10.0 Miles 0.0 5.0 \n", + "756374 NaN NaN Miles 25.0 32.0 \n", + "756375 NaN NaN Miles 20.0 25.0 \n", + "756376 NaN NaN Miles 28.0 33.0 \n", + "756377 NaN NaN Miles NaN NaN \n", + "\n", + " temp_unit min_wind max_wind wind_unit min_snow max_snow \\\n", + "756373 2.0 0.0 17.0 1.0 12.0 30.0 \n", + "756374 NaN 10.0 25.0 NaN 6.0 36.0 \n", + "756375 NaN 12.0 15.0 NaN 1.0 18.0 \n", + "756376 NaN 10.0 10.0 NaN 18.0 30.0 \n", + "756377 NaN NaN NaN NaN NaN NaN \n", + "\n", + " snow_unit am_cloud pm_cloud field_distance_imperial \\\n", + "756373 2.0 1.0 1.0 26.0 \n", + "756374 NaN 6.0 6.0 NaN \n", + "756375 NaN 2.0 6.0 NaN \n", + "756376 NaN 7.0 7.0 NaN \n", + "756377 NaN NaN NaN NaN \n", + "\n", + " field_distance_metric nocturnal_distance_imperial \\\n", + "756373 41.841004 10.0 \n", + "756374 NaN NaN \n", + "756375 NaN NaN \n", + "756376 NaN NaN \n", + "756377 NaN NaN \n", + "\n", + " nocturnal_distance_metric min_snow_imperial min_snow_metric \\\n", + "756373 16.092694 12.000000 30.48 \n", + "756374 NaN 2.362205 15.24 \n", + "756375 NaN 0.393701 2.54 \n", + "756376 NaN 7.086614 45.72 \n", + "756377 NaN NaN NaN \n", + "\n", + " max_snow_metric max_snow_imperial min_temp_imperial \\\n", + "756373 76.20 30.000000 0.0 \n", + "756374 91.44 14.173228 102.6 \n", + "756375 45.72 7.086614 93.6 \n", + "756376 76.20 11.811024 108.0 \n", + "756377 NaN NaN NaN \n", + "\n", + " max_temp_imperial min_temp_metric max_temp_metric min_wind_metric \\\n", + "756373 5.0 -17.777778 -15.000000 0.000000 \n", + "756374 115.2 -3.888889 0.000000 16.092694 \n", + "756375 102.6 -6.666667 -3.888889 19.311233 \n", + "756376 117.0 -2.222222 0.555556 16.092694 \n", + "756377 NaN NaN NaN NaN \n", + "\n", + " max_wind_metric min_wind_imperial max_wind_imperial \\\n", + "756373 27.357580 0.0000 17.000 \n", + "756374 40.231735 6.2140 15.535 \n", + "756375 24.139041 7.4568 9.321 \n", + "756376 16.092694 6.2140 6.214 \n", + "756377 NaN NaN NaN \n", + "\n", + " ui geohash_circle circle_id id latitude \\\n", + "756373 67.0833-162.9667_2004 b7uj b7ujes5 USR0000AMTN 67.1414 \n", + "756374 67.1-162.8333_1985 b7uj b7ujwxh USR0000AMTN 67.1414 \n", + "756375 67.1-162.8333_1986 b7uj b7ujwxh USR0000AMTN 67.1414 \n", + "756376 67.1-162.8333_1987 b7uj b7ujwxh USR0000AMTN 67.1414 \n", + "756377 71.3333-156.6667_1976 bs8t bs8tfcy USW00027502 71.2833 \n", + "\n", + " longitude elevation state name gsn_flag \\\n", + "756373 -162.9944 246.6 AK MT. NOAK ALASKA \n", + "756374 -162.9944 246.6 AK MT. NOAK ALASKA \n", + "756375 -162.9944 246.6 AK MT. NOAK ALASKA \n", + "756376 -162.9944 246.6 AK MT. NOAK ALASKA \n", + "756377 -156.7814 9.4 AK BARROW POST ROGERS AP GSN \n", + "\n", + " hcn_crn_flag wmoid geohash_station temp_min_value temp_max_value \\\n", + "756373 NaN b7uj NaN NaN \n", + "756374 NaN b7uj NaN NaN \n", + "756375 NaN b7uj NaN NaN \n", + "756376 NaN b7uj NaN NaN \n", + "756377 70026.0 bs8t -267.0 -206.0 \n", + "\n", + " precipitation_value temp_avg snow snwd am_rain pm_rain am_snow \\\n", + "756373 NaN NaN NaN NaN 3 3 3 \n", + "756374 NaN NaN NaN NaN 3 3 2 \n", + "756375 NaN NaN NaN NaN 3 3 2 \n", + "756376 NaN NaN NaN NaN 3 3 1 \n", + "756377 3.0 NaN 3.0 102.0 NaN NaN NaN \n", + "\n", + " pm_snow circle_elev elevation_source block_fips county_fips \\\n", + "756373 3 22.33 ghcn_d NaN NaN \n", + "756374 2 42.54 ghcn_d NaN NaN \n", + "756375 2 42.54 ghcn_d NaN NaN \n", + "756376 1 42.54 ghcn_d NaN NaN \n", + "756377 NaN 0.31 ghcn_d NaN NaN \n", + "\n", + " Ecosys_circle Usgsid_sys_circle Nlcd_code_circle Nlcd_circle \\\n", + "756373 NaN NaN NaN NaN \n", + "756374 NaN NaN NaN NaN \n", + "756375 NaN NaN NaN NaN \n", + "756376 NaN NaN NaN NaN \n", + "756377 NaN NaN NaN NaN \n", + "\n", + " Ecosys_station Usgsid_sys_station Nlcd_code_station Nlcd_station \n", + "756373 NaN NaN NaN NaN \n", + "756374 NaN NaN NaN NaN \n", + "756375 NaN NaN NaN NaN \n", + "756376 NaN NaN NaN NaN \n", + "756377 NaN NaN NaN NaN " + ] + }, + "execution_count": 231, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "raw_data.tail()" + ] + }, + { + "cell_type": "code", + "execution_count": 232, "metadata": {}, "outputs": [ { @@ -174,10 +830,14 @@ " \n", " \n", " \n", + " ui\n", " count_year\n", + " count_date\n", " circle_name\n", " circle_id\n", + " Ecosys_circle\n", " specific_circle_ecosystem\n", + " Nlcd_code_circle\n", " macro_circle_ecosystem\n", " circle_elevation\n", " circle_lat\n", @@ -185,7 +845,9 @@ " circle_min_temp\n", " circle_max_temp\n", " noaa_id\n", + " Ecosys_station\n", " specific_station_ecosystem\n", + " Nlcd_code_station\n", " macro_station_ecosystem\n", " noaa_elevation\n", " noaa_lat\n", @@ -197,11 +859,15 @@ " \n", " \n", " 0\n", + " 19.4333-155.2833_1955\n", " 1955\n", + " 1955-01-01\n", " Hawai'i: Volcano N.P.\n", " 8e3wd3w\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", " 1228.18\n", " 19.4333\n", " -155.2833\n", @@ -210,6 +876,8 @@ " USC00511303\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", " 1210.40\n", " 19.4297\n", " -155.2561\n", @@ -218,11 +886,15 @@ " \n", " \n", " 1\n", + " 19.4333-155.2833_1956\n", " 1956\n", + " 1955-12-31\n", " Hawai'i: Volcano N.P.\n", " 8e3wd3w\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", " 1228.18\n", " 19.4333\n", " -155.2833\n", @@ -231,6 +903,8 @@ " USC00511303\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", " 1210.40\n", " 19.4297\n", " -155.2561\n", @@ -239,19 +913,25 @@ " \n", " \n", " 2\n", + " 19.4333-155.2833_1968\n", " 1968\n", + " 1967-12-30\n", " Hawai'i: Volcano N.P.\n", " 8e3wd3w\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", " 1228.18\n", " 19.4333\n", " -155.2833\n", - " 54.0\n", - " 66.0\n", + " 12.222222\n", + " 18.888889\n", " US1HIHI0013\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", " 1059.20\n", " 19.4391\n", " -155.2156\n", @@ -260,19 +940,25 @@ " \n", " \n", " 3\n", + " 19.4333-155.2833_1968\n", " 1968\n", + " 1967-12-30\n", " Hawai'i: Volcano N.P.\n", " 8e3wd3w\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", " 1228.18\n", " 19.4333\n", " -155.2833\n", - " 54.0\n", - " 66.0\n", + " 12.222222\n", + " 18.888889\n", " US1HIHI0071\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", " 1194.80\n", " 19.4414\n", " -155.2487\n", @@ -281,19 +967,25 @@ " \n", " \n", " 4\n", + " 19.4333-155.2833_1968\n", " 1968\n", + " 1967-12-30\n", " Hawai'i: Volcano N.P.\n", " 8e3wd3w\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", " 1228.18\n", " 19.4333\n", " -155.2833\n", - " 54.0\n", - " 66.0\n", + " 12.222222\n", + " 18.888889\n", " USC00514563\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", " 1079.87\n", " 19.4094\n", " -155.2608\n", @@ -305,12 +997,19 @@ "" ], "text/plain": [ - " count_year circle_name circle_id specific_circle_ecosystem \\\n", - "0 1955 Hawai'i: Volcano N.P. 8e3wd3w NaN \n", - "1 1956 Hawai'i: Volcano N.P. 8e3wd3w NaN \n", - "2 1968 Hawai'i: Volcano N.P. 8e3wd3w NaN \n", - "3 1968 Hawai'i: Volcano N.P. 8e3wd3w NaN \n", - "4 1968 Hawai'i: Volcano N.P. 8e3wd3w NaN \n", + " ui count_year count_date circle_name \\\n", + "0 19.4333-155.2833_1955 1955 1955-01-01 Hawai'i: Volcano N.P. \n", + "1 19.4333-155.2833_1956 1956 1955-12-31 Hawai'i: Volcano N.P. \n", + "2 19.4333-155.2833_1968 1968 1967-12-30 Hawai'i: Volcano N.P. \n", + "3 19.4333-155.2833_1968 1968 1967-12-30 Hawai'i: Volcano N.P. \n", + "4 19.4333-155.2833_1968 1968 1967-12-30 Hawai'i: Volcano N.P. \n", + "\n", + " circle_id Ecosys_circle specific_circle_ecosystem Nlcd_code_circle \\\n", + "0 8e3wd3w NaN NaN NaN \n", + "1 8e3wd3w NaN NaN NaN \n", + "2 8e3wd3w NaN NaN NaN \n", + "3 8e3wd3w NaN NaN NaN \n", + "4 8e3wd3w NaN NaN NaN \n", "\n", " macro_circle_ecosystem circle_elevation circle_lat circle_lon \\\n", "0 NaN 1228.18 19.4333 -155.2833 \n", @@ -319,47 +1018,51 @@ "3 NaN 1228.18 19.4333 -155.2833 \n", "4 NaN 1228.18 19.4333 -155.2833 \n", "\n", - " circle_min_temp circle_max_temp noaa_id specific_station_ecosystem \\\n", - "0 NaN NaN USC00511303 NaN \n", - "1 NaN NaN USC00511303 NaN \n", - "2 54.0 66.0 US1HIHI0013 NaN \n", - "3 54.0 66.0 US1HIHI0071 NaN \n", - "4 54.0 66.0 USC00514563 NaN \n", - "\n", - " macro_station_ecosystem noaa_elevation noaa_lat noaa_lon noaa_min_temp \\\n", - "0 NaN 1210.40 19.4297 -155.2561 100.0 \n", - "1 NaN 1210.40 19.4297 -155.2561 117.0 \n", - "2 NaN 1059.20 19.4391 -155.2156 NaN \n", - "3 NaN 1194.80 19.4414 -155.2487 NaN \n", - "4 NaN 1079.87 19.4094 -155.2608 NaN \n", - "\n", - " noaa_max_temp \n", - "0 161.0 \n", - "1 189.0 \n", - "2 NaN \n", - "3 NaN \n", - "4 NaN " + " circle_min_temp circle_max_temp noaa_id Ecosys_station \\\n", + "0 NaN NaN USC00511303 NaN \n", + "1 NaN NaN USC00511303 NaN \n", + "2 12.222222 18.888889 US1HIHI0013 NaN \n", + "3 12.222222 18.888889 US1HIHI0071 NaN \n", + "4 12.222222 18.888889 USC00514563 NaN \n", + "\n", + " specific_station_ecosystem Nlcd_code_station macro_station_ecosystem \\\n", + "0 NaN NaN NaN \n", + "1 NaN NaN NaN \n", + "2 NaN NaN NaN \n", + "3 NaN NaN NaN \n", + "4 NaN NaN NaN \n", + "\n", + " noaa_elevation noaa_lat noaa_lon noaa_min_temp noaa_max_temp \n", + "0 1210.40 19.4297 -155.2561 100.0 161.0 \n", + "1 1210.40 19.4297 -155.2561 117.0 189.0 \n", + "2 1059.20 19.4391 -155.2156 NaN NaN \n", + "3 1194.80 19.4414 -155.2487 NaN NaN \n", + "4 1079.87 19.4094 -155.2608 NaN NaN " ] }, - "execution_count": 5, + "execution_count": 232, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Pulling out temperature data and renaming columns for clarification\n", - "temp_df_raw = raw_data[['count_year',\n", + "temp_df_raw = raw_data[['ui','count_year', 'count_date',\n", " 'circle_name', \n", " 'circle_id',\n", + " 'Ecosys_circle',\n", " 'Usgsid_sys_circle',\n", + " 'Nlcd_code_circle',\n", " 'Nlcd_circle',\n", " 'circle_elev',\n", " 'lat',\n", " 'lon',\n", - " 'min_temp',\n", - " 'max_temp',\n", + " 'min_temp_metric',\n", + " 'max_temp_metric',\n", " 'id',\n", + " 'Ecosys_station',\n", " 'Usgsid_sys_station',\n", + " 'Nlcd_code_station',\n", " 'Nlcd_station',\n", " 'elevation',\n", " 'latitude',\n", @@ -376,8 +1079,8 @@ " 'Nlcd_circle':'macro_circle_ecosystem',\n", " 'lat':'circle_lat',\n", " 'lon':'circle_lon',\n", - " 'min_temp':'circle_min_temp',\n", - " 'max_temp':'circle_max_temp',\n", + " 'min_temp_metric':'circle_min_temp',\n", + " 'max_temp_metric':'circle_max_temp',\n", " 'temp_unit':'circle_temp_unit',\n", " 'id':'noaa_id',\n", " 'Usgsid_sys_station':'specific_station_ecosystem',\n", @@ -394,6 +1097,24 @@ "temp_df.head()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Convert Noaa tempertures from 10th of degrees C to degrees Celecus \n", + "Reference: https://docs.opendata.aws/noaa-ghcn-pds/readme.html" + ] + }, + { + "cell_type": "code", + "execution_count": 233, + "metadata": {}, + "outputs": [], + "source": [ + "temp_df['noaa_min_temp'] = temp_df['noaa_min_temp'] / 10\n", + "temp_df['noaa_max_temp'] = temp_df['noaa_max_temp'] / 10" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -403,12 +1124,73 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 235, "metadata": {}, "outputs": [], "source": [ "temp_df['circle_average_temp'] = temp_df[['circle_min_temp', 'circle_max_temp']].mean(axis=1)\n", - "temp_df['noaa_average_temp'] = temp_df[['noaa_min_temp', 'noaa_max_temp']].mean(axis=1)" + "temp_df['noaa_average_temp'] = temp_df[['noaa_min_temp', 'noaa_max_temp']].mean(axis=1)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Calulcate if the circle and station share ecosystem classifyers " + ] + }, + { + "cell_type": "code", + "execution_count": 236, + "metadata": {}, + "outputs": [], + "source": [ + "temp_df['same_ecosys'] = temp_df.Ecosys_circle.astype('Int64') == temp_df.Ecosys_station.astype('Int64')\n", + "temp_df['same_nlcd'] = temp_df.Nlcd_code_circle.astype('Int64') == temp_df.Nlcd_code_station.astype('Int64')" + ] + }, + { + "cell_type": "code", + "execution_count": 237, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False 253761\n", + "True 155928\n", + "Name: same_ecosys, dtype: Int64" + ] + }, + "execution_count": 237, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "temp_df['same_ecosys'].value_counts(dropna = True)" + ] + }, + { + "cell_type": "code", + "execution_count": 238, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True 247630\n", + "False 166281\n", + "Name: same_nlcd, dtype: Int64" + ] + }, + "execution_count": 238, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "temp_df['same_nlcd'].value_counts(dropna = True)" ] }, { @@ -434,7 +1216,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 239, "metadata": {}, "outputs": [], "source": [ @@ -473,7 +1255,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 240, "metadata": {}, "outputs": [], "source": [ @@ -490,7 +1272,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 241, "metadata": {}, "outputs": [], "source": [ @@ -512,6 +1294,23 @@ "## Missing Data" ] }, + { + "cell_type": "code", + "execution_count": 242, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The number of rows in the dataset is: 756378\n" + ] + } + ], + "source": [ + "print(f\"The number of rows in the dataset is: {temp_df.shape[0]}\")" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -521,7 +1320,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 243, "metadata": {}, "outputs": [ { @@ -542,20 +1341,89 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Counting number of temperature measuremnts that are missing" + "#### The number of unique cbc counts in the data\n", + "A cbc count in a given year will appear multiple times in the dataset for each reference station it is matched with" + ] + }, + { + "cell_type": "code", + "execution_count": 244, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Number of unique cbc counts: 80765\n" + ] + } + ], + "source": [ + "print(f\" Number of unique cbc counts: {temp_df['ui'].nunique()}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### The number of unique noaa stations in the data\n", + "A NOAA station used as a reference for volunteer reported data might appear multiple time over the years as a count is repeated each year." + ] + }, + { + "cell_type": "code", + "execution_count": 331, + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'noaa_id'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m~/.pyenv/versions/funhacks371/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 2645\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2646\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2647\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mKeyError\u001b[0m: 'noaa_id'", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\" Number of unique noaa stations: {temp_df['noaa_id'].nunique()}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/.pyenv/versions/funhacks371/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 2798\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnlevels\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2799\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_multilevel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2800\u001b[0;31m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2801\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_integer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2802\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.pyenv/versions/funhacks371/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 2646\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2647\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2648\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_cast_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2649\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtolerance\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtolerance\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2650\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mKeyError\u001b[0m: 'noaa_id'" + ] + } + ], + "source": [ + "print(f\" Number of unique noaa stations: {temp_df['noaa_id'].nunique()}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Counting number of temperature measuremnts that are missing\n", + "Note:, these numbers represent repeats of the same circles multiple times " ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 246, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Number of missing CBC Min Temps : 26942\n", - "Number of missing CBC Max Temps : 26960\n", + "Number of missing CBC Min Temps : 26996\n", + "Number of missing CBC Max Temps : 27349\n", "Number of missing NOAA Min Temps : 675297\n", "Number of missing NOAA Max Temps : 675285\n" ] @@ -570,360 +1438,3454 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 247, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of rows missing both Min and Max Temps from CBC : 26938\n", + "Number of rows missing both Min and Max Temps from NOAA: 675076\n", + "\n", + "Number of rows missing all temperature data : 7668\n" + ] + } + ], + "source": [ + "print(f\"Number of rows missing both Min and Max Temps from CBC : {temp_df.loc[temp_df['circle_min_temp'].isna() & temp_df['circle_max_temp'].isna()].shape[0]}\")\n", + "print(f\"Number of rows missing both Min and Max Temps from NOAA: {temp_df.loc[temp_df['noaa_min_temp'].isna() & temp_df['noaa_max_temp'].isna()].shape[0]}\")\n", + "print()\n", + "print(f\"Number of rows missing all temperature data : {temp_df.loc[temp_df['circle_min_temp'].isna() & temp_df['circle_max_temp'].isna() & temp_df['noaa_min_temp'].isna() & temp_df['noaa_max_temp'].isna()].shape[0]}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 248, + "metadata": {}, + "outputs": [], + "source": [ + "# Add this data into the dataframe \n", + "temp_df['cbc_temp_present'] = np.where(temp_df['circle_min_temp'].isna() & temp_df['circle_max_temp'], False, True)\n", + "temp_df['station_temp_present'] = np.where(temp_df['noaa_min_temp'].isna() & temp_df['noaa_max_temp'], False, True)\n", + "\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "-----" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Out of Bounds Data " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Temperature Data\n", + "There are a number of outliers in the data set that could highly skew analysis. Any rows with a temperature outside of a min or max recorded temperature in the United States will be dropped.\n", + "\n", + "To be conservative in data dropping we'll only using on max and one min for the entire country rather than by state or other locality. Additionally we'll check by each min/max temp for circles and stations to get an idea on if one is more error prone than another.\n", + "\n", + "Data: https://en.wikipedia.org/wiki/U.S._state_and_territory_temperature_extremes" + ] + }, + { + "cell_type": "code", + "execution_count": 250, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Number of CBC rows missing both Min and Max Temps : 26884\n", - "Number of NOAA rows missing both Min and Max Temps : 675076\n", + "Number of CBC measurments outside max : 0\n", + "Number of NOAA measurments outside max : 1\n", + "\n", + "Number of CBC measurments outside min : 0\n", + "Number of NOAA measurments outside min : 0\n", "\n", - "Number of rows missing all temperature data : 7621\n" + "Number of NOAA stations with both outside : 0\n" + ] + } + ], + "source": [ + "# Creating variables for each drop condition\n", + "circle_over_max_temp = temp_df.loc[temp_df[\"circle_max_temp\"]>max_temp_check]\n", + "circle_under_min_temp = temp_df.loc[temp_df[\"circle_min_temp\"]max_temp_check]\n", + "noaa_under_min_temp = temp_df.loc[temp_df[\"noaa_min_temp\"] max_temp_check) & (temp_df[\"noaa_min_temp\"] < min_temp_check)].shape[0]}')\n", + "\n", + "# Setting list of indices to drop\n", + "index_drop_list = list(circle_over_max_temp.index) + list(circle_under_min_temp.index) + list(noaa_over_max_temp.index) + list(noaa_under_min_temp.index)\n", + "\n", + "# Dropping All out of bout roundsRows\n", + "temp_df.drop(index_drop_list, inplace=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Distance Data" + ] + }, + { + "cell_type": "code", + "execution_count": 251, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of rows dropped outside of distance threshold: 558454\n" ] } ], "source": [ - "print(f\"Number of CBC rows missing both Min and Max Temps : {temp_df.loc[temp_df['circle_min_temp'].isna() & temp_df['circle_max_temp'].isna()].shape[0]}\")\n", - "print(f\"Number of NOAA rows missing both Min and Max Temps : {temp_df.loc[temp_df['noaa_min_temp'].isna() & temp_df['noaa_max_temp'].isna()].shape[0]}\")\n", - "print()\n", - "print(f\"Number of rows missing all temperature data : {temp_df.loc[temp_df['circle_min_temp'].isna() & temp_df['circle_max_temp'].isna() & temp_df['noaa_min_temp'].isna() & temp_df['noaa_max_temp'].isna()].shape[0]}\")" + "# Dropping rows with distance differences larger then set threshold\n", + "temp_df.drop(temp_df[temp_df['distance_diff'] > distance_threshold].index, inplace=True)\n", + "print(f'Number of rows dropped outside of distance threshold: {temp_df.shape[0]}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Elevation Data" + ] + }, + { + "cell_type": "code", + "execution_count": 252, + "metadata": {}, + "outputs": [], + "source": [ + "# Dropping rows with circles and stations that are over the elevation threshold\n", + "temp_df.drop(temp_df[temp_df['elevation_diff'] > elevation_threshold].index, inplace=True)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Examine the Data Remaining for Analysis" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Assessing CBC Range" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Checking to see how many CBC Circle Min Temperatures records are within the bounds of the NOAA Station records" + ] + }, + { + "cell_type": "code", + "execution_count": 253, + "metadata": {}, + "outputs": [], + "source": [ + "temp_df['min_bw_noaa'] = np.where(temp_df['circle_min_temp'].isna(), np.NaN, temp_df['circle_min_temp'].between(temp_df['noaa_min_temp'], temp_df['noaa_max_temp']))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 254, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0 375534\n", + "1.0 21404\n", + "NaN 15167\n", + "Name: min_bw_noaa, dtype: int64" + ] + }, + "execution_count": 254, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "temp_df['min_bw_noaa'].value_counts(dropna = False)" + ] + }, + { + "cell_type": "code", + "execution_count": 255, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of rows (circle-station pairing) where Min temperature is in the bounds of the corresponding NOAA station: 21404.0\n", + "Number of rows (circle-station pairing) where Min temperature is not in the bounds of the corresponding NOAA station: 390701.0\n", + "\n", + "5.0% of circle min temp's lay between\n" + ] + } + ], + "source": [ + "# Counting number of circles that are true\n", + "min_temp_true = temp_df['min_bw_noaa'].sum()\n", + "min_temp_false = temp_df.shape[0] - temp_df['min_bw_noaa'].sum()\n", + "print(f\"Number of rows (circle-station pairing) where Min temperature is in the bounds of the corresponding NOAA station: {temp_df['min_bw_noaa'].sum()}\")\n", + "print(f\"Number of rows (circle-station pairing) where Min temperature is not in the bounds of the corresponding NOAA station: {temp_df.shape[0] - temp_df['min_bw_noaa'].sum()}\")\n", + "print()\n", + "print(f\"{round((min_temp_true/temp_df.shape[0])*100)}% of circle min temp's lay between\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Checking to see how many CBC Circle Max Temperatures records are within the bounds of the NOAA Station records" + ] + }, + { + "cell_type": "code", + "execution_count": 256, + "metadata": {}, + "outputs": [], + "source": [ + "temp_df['max_bw_noaa'] = np.where(temp_df['circle_max_temp'].isna(), np.NaN, temp_df['circle_max_temp'].between(temp_df['noaa_min_temp'], temp_df['noaa_max_temp']))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 259, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0 377489\n", + "1.0 19262\n", + "NaN 15354\n", + "Name: max_bw_noaa, dtype: int64" + ] + }, + "execution_count": 259, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "temp_df['max_bw_noaa'].value_counts(dropna = False)" + ] + }, + { + "cell_type": "code", + "execution_count": 257, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of rows (circle-station pairing) where temperature is in the bounds of the corresponding NOAA station: 19262.0\n", + "Number of rows (circle-station pairing) where temperature is not in the bounds of the corresponding NOAA station: 392843.0\n", + "\n", + "5.0% of stations lay between\n" + ] + } + ], + "source": [ + "# Counting number of circles that are true\n", + "max_temp_true = temp_df['max_bw_noaa'].sum()\n", + "max_temp_false = temp_df.shape[0] - sum(temp_df['max_bw_noaa'])\n", + "print(f\"Number of rows (circle-station pairing) where temperature is in the bounds of the corresponding NOAA station: {temp_df['max_bw_noaa'].sum()}\")\n", + "print(f\"Number of rows (circle-station pairing) where temperature is not in the bounds of the corresponding NOAA station: {temp_df.shape[0] - temp_df['max_bw_noaa'].sum()}\")\n", + "print()\n", + "print(f\"{round((max_temp_true/temp_df.shape[0])*100)}% of stations lay between\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "## Temperature Measurement Goodness\n", + "\n", + "temp_metric = sqrt( (noaa_min_temp - circle_min_temp)^2 + (noaa_max_temp - circle_max_temp)^2 )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Goodness Metric\n", + "temp_goodness = sqrt( (noaa_min_temp - circle_min_temp)^2 + (noaa_max_temp - circle_max_temp)^2 )" + ] + }, + { + "cell_type": "code", + "execution_count": 260, + "metadata": {}, + "outputs": [], + "source": [ + "temp_df['temp_goodness'] = round(np.sqrt(((temp_df['noaa_min_temp'] - temp_df['circle_min_temp'])**2) + ((temp_df['noaa_max_temp'] - temp_df['circle_max_temp'])**2)),2)\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 261, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 34569.000000\n", + "mean 4.361014\n", + "std 4.005240\n", + "min 0.000000\n", + "25% 1.730000\n", + "50% 3.400000\n", + "75% 5.810000\n", + "max 66.660000\n", + "Name: temp_goodness, dtype: float64" + ] + }, + "execution_count": 261, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "temp_df['temp_goodness'].describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 262, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 262, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "temp_df['temp_goodness'].hist(bins = 20)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Catagories\n", + "Values in catagories can be changed and then applied to dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 263, + "metadata": {}, + "outputs": [], + "source": [ + "# Function to assign grade scores\n", + "def assign_grade(metric_score):\n", + " if metric_score <= excellent_score:\n", + " return 'excellent'\n", + " elif metric_score <= good_score:\n", + " return 'good'\n", + " elif metric_score <= fair_score:\n", + " return 'fair'\n", + " else:\n", + " return 'poor'" + ] + }, + { + "cell_type": "code", + "execution_count": 264, + "metadata": {}, + "outputs": [], + "source": [ + "# Applying the scores\n", + "temp_df['goodness_grade'] = temp_df['temp_goodness'].apply(lambda metric_score: assign_grade(metric_score))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Group by Circles Now For Final Counts \n", + "num - The number of stations for a circle for a count date\n", + "\n", + "num_notna - The number of stations that are not null for their temp measurments\n", + "\n", + "\n", + "The next sections for \"e_\" and \"n_\" are the same as abouve, except e is only stations for a circle that\n", + "are in the same ecosystem and n is stations with the same nlcd code.\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 265, + "metadata": {}, + "outputs": [], + "source": [ + "def temp_calc(dfg):\n", + " is_na_max = dfg.max_bw_noaa.isna()\n", + " is_na_min = dfg.min_bw_noaa.isna()\n", + "\n", + " # there can be weirdness with boolean not being promoted to ints, so add zero\n", + " # Count the number of stations for this circle\n", + " # Station temp present is true for is both min and max are present in the \n", + " # station data\n", + " num = dfg.station_temp_present.size + 0\n", + " # Count the number of stations where both temp mesurments are not NA \n", + " num_notna = dfg.station_temp_present.size + 0\n", + " # Count the number of stations where the colunteer submitted average was between the NOAA min and max\n", + " if dfg.min_bw_noaa.isnull().all():\n", + " num_min_bw_noaa = np.NaN\n", + " else:\n", + " num_min_bw_noaa = dfg.min_bw_noaa.sum()\n", + " \n", + " if dfg.max_bw_noaa.isnull().all():\n", + " num_max_bw_noaa = np.NaN\n", + " else:\n", + " num_max_bw_noaa = dfg.max_bw_noaa.sum()\n", + " \n", + " \n", + " # Number of Stations with a 'each Classifyer'\n", + " num_excellent = dfg.loc[dfg.goodness_grade == 'excellent'].shape[0]\n", + " num_good = dfg.loc[dfg.goodness_grade == 'good'].shape[0]\n", + " num_fair = dfg.loc[dfg.goodness_grade == 'fair'].shape[0]\n", + " num_poor = dfg.loc[dfg.goodness_grade == 'poor'].shape[0]\n", + " \n", + " try: \n", + " goodness_mode = mode(dfg.goodness_grade.values.tolist())\n", + " except: \n", + " goodness_mode = np.NaN\n", + " \n", + " \n", + "\n", + " \n", + " ## BREAKDOWN By Ecosys \n", + " e = dfg.loc[dfg.same_ecosys]\n", + " # Count the number of stations for this circle\n", + " e_num = e.station_temp_present.size + 0\n", + " # Count the number of stations where both temp mesurments are not NA \n", + " e_num_notna = e.station_temp_present.size + 0\n", + " # Count the number of stations where the colunteer submitted average was between the NOAA min and max\n", + " if e.min_bw_noaa.isnull().all():\n", + " e_num_min_bw_noaa = np.NaN\n", + " else:\n", + " e_num_min_bw_noaa = e.min_bw_noaa.sum()\n", + " \n", + " if e.max_bw_noaa.isnull().all():\n", + " e_num_max_bw_noaa = np.NaN\n", + " else:\n", + " e_num_max_bw_noaa = e.max_bw_noaa.sum()\n", + " \n", + " # Number of Stations with a 'each Classifyer'\n", + " e_num_excellent = e.loc[dfg.goodness_grade == 'excellent'].shape[0]\n", + " e_num_good = e.loc[e.goodness_grade == 'good'].shape[0]\n", + " e_num_fair = e.loc[e.goodness_grade == 'fair'].shape[0]\n", + " e_num_poor = e.loc[e.goodness_grade == 'poor'].shape[0]\n", + " \n", + " try: \n", + " e_goodness_mode = mode(e.goodness_grade.values.tolist())\n", + " except: \n", + " e_goodness_mode = np.NaN\n", + " \n", + " \n", + " ## BREAKDOWN By NCLD \n", + " n = dfg.loc[dfg.same_nlcd]\n", + " # Count the number of stations for this circle\n", + " n_num = n.station_temp_present.size + 0\n", + " # Count the number of stations where both temp mesurments are not NA \n", + " n_num_notna = n.station_temp_present.size + 0\n", + " # Count the number of stations where the colunteer submitted average was between the NOAA min and max\n", + " if n.min_bw_noaa.isnull().all():\n", + " n_num_min_bw_noaa = np.NaN\n", + " else:\n", + " n_num_min_bw_noaa = n.min_bw_noaa.sum()\n", + " \n", + " if n.max_bw_noaa.isnull().all():\n", + " n_num_max_bw_noaa = np.NaN\n", + " else:\n", + " n_num_max_bw_noaa = n.max_bw_noaa.sum()\n", + " \n", + " # Number of Stations with a 'each Classifyer'\n", + " n_num_excellent = n.loc[dfg.goodness_grade == 'excellent'].shape[0]\n", + " n_num_good = n.loc[n.goodness_grade == 'good'].shape[0]\n", + " n_num_fair = n.loc[n.goodness_grade == 'fair'].shape[0]\n", + " n_num_poor = n.loc[n.goodness_grade == 'poor'].shape[0]\n", + " \n", + " try: \n", + " n_goodness_mode = mode(n.goodness_grade.values.tolist())\n", + " except: \n", + " n_goodness_mode = np.NaN\n", + " \n", + "# # havent figured out how to inlinse this yet ...\n", + " lowest_id = dfg.elevation_diff.idxmin() if (num > 0) else np.NaN\n", + "\n", + " try:\n", + " lowest_id_na = dfg.loc[~is_na_max, 'elevation_diff'].idxmin() if ((num_notna > 0) and dfg.loc[~is_na_max, 'elevation_diff'].shape[0] > 0) else np.NaN\n", + " except:\n", + " lowest_id_na = np.NaN\n", + " \n", + " \n", + " \n", + " return pd.Series({\n", + " 'num' : num,\n", + " 'num_notna' : num_notna,\n", + " 'num_min_bw_noaa' : num_min_bw_noaa,\n", + " 'num_max_bw_noaa' : num_max_bw_noaa,\n", + " 'num_excellent' : num_excellent,\n", + " 'num_good' : num_good,\n", + " 'num_fair' : num_fair,\n", + " 'num_poor' : num_poor,\n", + " 'goodness_mode' : goodness_mode,\n", + " \n", + " 'e_num' : e_num,\n", + " 'e_num_notna' : e_num_notna,\n", + " 'e_num_min_bw_noaa' : e_num_min_bw_noaa,\n", + " 'e_num_max_bw_noaa' : e_num_max_bw_noaa,\n", + " 'e_num_excellent' : e_num_excellent,\n", + " 'e_num_good' : e_num_good,\n", + " 'e_num_fair' : e_num_fair,\n", + " 'e_num_poor' : e_num_poor,\n", + " 'e_goodness_mode' : e_goodness_mode,\n", + " \n", + " \n", + " 'n_num' : n_num,\n", + " 'n_num_notna' : n_num_notna,\n", + " 'n_num_min_bw_noaa' : n_num_min_bw_noaa,\n", + " 'n_num_max_bw_noaa' : n_num_max_bw_noaa,\n", + " 'n_num_excellent' : n_num_excellent,\n", + " 'n_num_good' : n_num_good,\n", + " 'n_num_fair' : n_num_fair,\n", + " 'n_num_poor' : n_num_poor,\n", + " 'n_goodness_mode' : n_goodness_mode,\n", + " \n", + "\n", + " 'p' : num_max_bw_noaa / num_notna if (num_notna > 0) else np.NaN,\n", + " 'e_p' : e_num_max_bw_noaa / e_num_notna if (e_num_notna > 0) else np.NaN,\n", + " 'n_p' : n_num_max_bw_noaa / n_num_notna if (n_num_notna > 0) else np.NaN,\n", + " \n", + " \n", + " # Goodness Closest \n", + " 'goodness_closest' : dfg.loc[dfg.distance_diff.idxmin(), 'goodness_grade'] if (num > 0) else pd.NA,\n", + " \n", + " # Max Closest\n", + " 'max_temp_closest' : dfg.loc[dfg.distance_diff.idxmin(), 'max_bw_noaa'] if (num > 0) else pd.NA,\n", + " 'max_temp_closest_value' : dfg.loc[dfg.distance_diff.idxmin(), 'noaa_max_temp'] if (num > 0) else pd.NA,\n", + " \n", + " 'max_temp_closest_notna' : dfg.loc[dfg.loc[~is_na_max, 'distance_diff'].idxmin(), 'max_bw_noaa'] if (num_notna > 0 and num_max_bw_noaa > 0 ) else pd.NA,\n", + " \n", + " # Min Closest\n", + " 'min_temp_closest' : dfg.loc[dfg.distance_diff.idxmin(), 'min_bw_noaa'] if (num > 0) else pd.NA,\n", + " 'min_temp_closest_value' : dfg.loc[dfg.distance_diff.idxmin(), 'noaa_min_temp'] if (num > 0) else pd.NA,\n", + " \n", + " 'min_temp_closest_notna' : dfg.loc[dfg.loc[~is_na_min, 'distance_diff'].idxmin(), 'min_bw_noaa'] if (num_notna > 0 and num_min_bw_noaa > 0 ) else pd.NA,\n", + " \n", + " #Goodness Lowest\n", + " 'goodness_lowest' : pd.NA if pd.isna(lowest_id) else dfg.loc[lowest_id, 'goodness_grade'],\n", + " \n", + " # Max Lowest \n", + " 'max_temp_lowest' : pd.NA if pd.isna(lowest_id) else dfg.loc[lowest_id, 'max_bw_noaa'],\n", + " 'max_temp_lowest_value' : pd.NA if pd.isna(lowest_id) else dfg.loc[lowest_id, 'noaa_max_temp'],\n", + " 'max_temp_lowest_notna' : pd.NA if pd.isna(lowest_id_na) else dfg.loc[lowest_id_na, 'max_bw_noaa'],\n", + " \n", + " # Min Losest\n", + " 'min_temp_lowest' : pd.NA if pd.isna(lowest_id) else dfg.loc[lowest_id, 'min_bw_noaa'],\n", + " 'min_temp_lowest_value' : pd.NA if pd.isna(lowest_id) else dfg.loc[lowest_id, 'noaa_min_temp'],\n", + " 'min_temp_lowest_notna' : pd.NA if pd.isna(lowest_id_na) else dfg.loc[lowest_id_na, 'min_bw_noaa'],\n", + " })\n" + ] + }, + { + "cell_type": "code", + "execution_count": 266, + "metadata": {}, + "outputs": [], + "source": [ + "temp_df = temp_df.set_index(['circle_id', 'count_date', 'noaa_id']).sort_index()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 267, + "metadata": {}, + "outputs": [], + "source": [ + "g = temp_df.groupby(level=['circle_id', 'count_date'])" + ] + }, + { + "cell_type": "code", + "execution_count": 268, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 268, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# The new fields used for Analysis\n", + "num - The number of stations for a circle for a count date \n", + "\n", + "num_notna - The number of stations that are not null for their max temp measurment \n", + "\n", + "num_min_bw_noaa, num_max_bw_noaa - The number of reference stations that bount the circle\n", + "reported min temp and max temp, respectivly. \n", + "\n", + "num_excellent, num_good, num_fair, num_poor - The number of stations thats goodness \n", + "measure was in categories, excellent, good, far, ect \n", + "\n", + "goodness_mode - The goodness matric category that appears the most often for a circle \n", + "\n", + "num_min_bw_noaa - The number of stations where the cbc Min was within NOAA bounds \n", + "num_min_bw_noaa - The number of stations where the cbc Max was within NOAA bounds\n", + "\n", + "The next sections for \"e_\" and \"n_\" are the same as abouve, except e is only stations for a circle that\n", + "are in the same ecosystem and n is stations with the same nlcd code.\n", + "\n", + "p - The proportion of stations that reported a max temp within bounds for a circle\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 274, + "metadata": {}, + "outputs": [], + "source": [ + "# Compute the fields used for Analysis\n", + "circle_data = g.apply(temp_calc)" + ] + }, + { + "cell_type": "code", + "execution_count": 275, + "metadata": {}, + "outputs": [], + "source": [ + "AGREEMENT_THRESHOLD = 0.75\n", + "AGREEMENT_THRESHOLD = max(AGREEMENT_THRESHOLD, 1 - AGREEMENT_THRESHOLD)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Determine if there is Consensus amoung a Circle's Reference Stations\n", + "Using the AGREEMENT_THRESHOLD set at the top of the notebook, determine if the there are enough stations for\n", + "a circle agree that it rained based on that agreement threshold.\n", + "\n", + "Do this for all stations and stations within the same ecosys and nlcd code " + ] + }, + { + "cell_type": "code", + "execution_count": 276, + "metadata": {}, + "outputs": [], + "source": [ + "# if the stations couldnt agree (p ∈ [1-AGREEMENT_THRESHOLD, AGREEMENT_THRESHOLD]) or p is NaN, set the consenus to NaN as well\n", + "circle_data['consensus'] = np.where(((circle_data.p >= (1-AGREEMENT_THRESHOLD)) & (circle_data.p <= AGREEMENT_THRESHOLD)) | circle_data.p.isna(), \n", + " pd.NA, circle_data.p >= AGREEMENT_THRESHOLD)\n", + "\n", + "circle_data['e_consensus'] = np.where(((circle_data.e_p >= (1-AGREEMENT_THRESHOLD)) & (circle_data.e_p <= AGREEMENT_THRESHOLD)) | circle_data.e_p.isna(), \n", + " pd.NA, circle_data.e_p >= AGREEMENT_THRESHOLD)\n", + "\n", + "circle_data['n_consensus'] = np.where(((circle_data.n_p >= (1-AGREEMENT_THRESHOLD)) & (circle_data.n_p <= AGREEMENT_THRESHOLD)) | circle_data.n_p.isna(), \n", + " pd.NA, circle_data.n_p >= AGREEMENT_THRESHOLD)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "code", + "execution_count": 277, + "metadata": {}, + "outputs": [], + "source": [ + "for c in ['consensus', 'e_consensus', 'n_consensus','max_temp_closest', 'max_temp_closest_notna', 'max_temp_lowest', 'max_temp_lowest_notna']:\n", + " circle_data[c] = circle_data[c].astype('boolean')\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": 278, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uicount_yearcircle_nameEcosys_circlespecific_circle_ecosystemNlcd_code_circlemacro_circle_ecosystemcircle_elevationcircle_latcircle_loncircle_min_tempcircle_max_tempEcosys_stationspecific_station_ecosystemNlcd_code_stationmacro_station_ecosystemnoaa_elevationnoaa_latnoaa_lonnoaa_min_tempnoaa_max_tempcircle_average_tempnoaa_average_tempsame_ecosyssame_nlcddistance_diffelevation_diffcbc_temp_presentstation_temp_presentmin_bw_noaamax_bw_noaatemp_goodnessgoodness_grade
circle_idcount_datenoaa_id
87ymwsn1968-12-26USC0051913022.0833-159.6667_19691969Kaua'i: WaimeaNaNNaNNaNNaN1015.7422.0833-159.666720.00000025.555556NaNNaNNaNNaN1051.922.1167-159.6167NaNNaN22.777778NaN<NA><NA>6352.27864436.16TrueTrue0.00.0NaNpoor
87ysunb1971-12-27US1HIKI000421.9667-159.4333_19721972Kaua'i: Lihu'eNaNNaNNaNNaN142.9621.9667-159.433323.33333325.555556NaNNaNNaNNaN186.821.9249-159.4983NaNNaN24.444444NaN<NA><NA>8159.88975343.84TrueTrue0.00.0NaNpoor
US1HIKI001421.9667-159.4333_19721972Kaua'i: Lihu'eNaNNaNNaNNaN142.9621.9667-159.433323.33333325.555556NaNNaNNaNNaN131.721.9055-159.5150NaNNaN24.444444NaN<NA><NA>10834.61410711.26TrueTrue0.00.0NaNpoor
US1HIKI001821.9667-159.4333_19721972Kaua'i: Lihu'eNaNNaNNaNNaN142.9621.9667-159.433323.33333325.555556NaNNaNNaNNaN160.321.9075-159.5181NaNNaN24.444444NaN<NA><NA>10950.02715117.34TrueTrue0.00.0NaNpoor
USC0051000621.9667-159.4333_19721972Kaua'i: Lihu'eNaNNaNNaNNaN142.9621.9667-159.433323.33333325.555556NaNNaNNaNNaN107.021.9500-159.4333NaNNaN24.444444NaN<NA><NA>1857.47992135.96TrueTrue0.00.0NaNpoor
\n", + "
" + ], + "text/plain": [ + " ui count_year \\\n", + "circle_id count_date noaa_id \n", + "87ymwsn 1968-12-26 USC00519130 22.0833-159.6667_1969 1969 \n", + "87ysunb 1971-12-27 US1HIKI0004 21.9667-159.4333_1972 1972 \n", + " US1HIKI0014 21.9667-159.4333_1972 1972 \n", + " US1HIKI0018 21.9667-159.4333_1972 1972 \n", + " USC00510006 21.9667-159.4333_1972 1972 \n", + "\n", + " circle_name Ecosys_circle \\\n", + "circle_id count_date noaa_id \n", + "87ymwsn 1968-12-26 USC00519130 Kaua'i: Waimea NaN \n", + "87ysunb 1971-12-27 US1HIKI0004 Kaua'i: Lihu'e NaN \n", + " US1HIKI0014 Kaua'i: Lihu'e NaN \n", + " US1HIKI0018 Kaua'i: Lihu'e NaN \n", + " USC00510006 Kaua'i: Lihu'e NaN \n", + "\n", + " specific_circle_ecosystem Nlcd_code_circle \\\n", + "circle_id count_date noaa_id \n", + "87ymwsn 1968-12-26 USC00519130 NaN NaN \n", + "87ysunb 1971-12-27 US1HIKI0004 NaN NaN \n", + " US1HIKI0014 NaN NaN \n", + " US1HIKI0018 NaN NaN \n", + " USC00510006 NaN NaN \n", + "\n", + " macro_circle_ecosystem circle_elevation \\\n", + "circle_id count_date noaa_id \n", + "87ymwsn 1968-12-26 USC00519130 NaN 1015.74 \n", + "87ysunb 1971-12-27 US1HIKI0004 NaN 142.96 \n", + " US1HIKI0014 NaN 142.96 \n", + " US1HIKI0018 NaN 142.96 \n", + " USC00510006 NaN 142.96 \n", + "\n", + " circle_lat circle_lon circle_min_temp \\\n", + "circle_id count_date noaa_id \n", + "87ymwsn 1968-12-26 USC00519130 22.0833 -159.6667 20.000000 \n", + "87ysunb 1971-12-27 US1HIKI0004 21.9667 -159.4333 23.333333 \n", + " US1HIKI0014 21.9667 -159.4333 23.333333 \n", + " US1HIKI0018 21.9667 -159.4333 23.333333 \n", + " USC00510006 21.9667 -159.4333 23.333333 \n", + "\n", + " circle_max_temp Ecosys_station \\\n", + "circle_id count_date noaa_id \n", + "87ymwsn 1968-12-26 USC00519130 25.555556 NaN \n", + "87ysunb 1971-12-27 US1HIKI0004 25.555556 NaN \n", + " US1HIKI0014 25.555556 NaN \n", + " US1HIKI0018 25.555556 NaN \n", + " USC00510006 25.555556 NaN \n", + "\n", + " specific_station_ecosystem \\\n", + "circle_id count_date noaa_id \n", + "87ymwsn 1968-12-26 USC00519130 NaN \n", + "87ysunb 1971-12-27 US1HIKI0004 NaN \n", + " US1HIKI0014 NaN \n", + " US1HIKI0018 NaN \n", + " USC00510006 NaN \n", + "\n", + " Nlcd_code_station macro_station_ecosystem \\\n", + "circle_id count_date noaa_id \n", + "87ymwsn 1968-12-26 USC00519130 NaN NaN \n", + "87ysunb 1971-12-27 US1HIKI0004 NaN NaN \n", + " US1HIKI0014 NaN NaN \n", + " US1HIKI0018 NaN NaN \n", + " USC00510006 NaN NaN \n", + "\n", + " noaa_elevation noaa_lat noaa_lon \\\n", + "circle_id count_date noaa_id \n", + "87ymwsn 1968-12-26 USC00519130 1051.9 22.1167 -159.6167 \n", + "87ysunb 1971-12-27 US1HIKI0004 186.8 21.9249 -159.4983 \n", + " US1HIKI0014 131.7 21.9055 -159.5150 \n", + " US1HIKI0018 160.3 21.9075 -159.5181 \n", + " USC00510006 107.0 21.9500 -159.4333 \n", + "\n", + " noaa_min_temp noaa_max_temp \\\n", + "circle_id count_date noaa_id \n", + "87ymwsn 1968-12-26 USC00519130 NaN NaN \n", + "87ysunb 1971-12-27 US1HIKI0004 NaN NaN \n", + " US1HIKI0014 NaN NaN \n", + " US1HIKI0018 NaN NaN \n", + " USC00510006 NaN NaN \n", + "\n", + " circle_average_temp noaa_average_temp \\\n", + "circle_id count_date noaa_id \n", + "87ymwsn 1968-12-26 USC00519130 22.777778 NaN \n", + "87ysunb 1971-12-27 US1HIKI0004 24.444444 NaN \n", + " US1HIKI0014 24.444444 NaN \n", + " US1HIKI0018 24.444444 NaN \n", + " USC00510006 24.444444 NaN \n", + "\n", + " same_ecosys same_nlcd distance_diff \\\n", + "circle_id count_date noaa_id \n", + "87ymwsn 1968-12-26 USC00519130 6352.278644 \n", + "87ysunb 1971-12-27 US1HIKI0004 8159.889753 \n", + " US1HIKI0014 10834.614107 \n", + " US1HIKI0018 10950.027151 \n", + " USC00510006 1857.479921 \n", + "\n", + " elevation_diff cbc_temp_present \\\n", + "circle_id count_date noaa_id \n", + "87ymwsn 1968-12-26 USC00519130 36.16 True \n", + "87ysunb 1971-12-27 US1HIKI0004 43.84 True \n", + " US1HIKI0014 11.26 True \n", + " US1HIKI0018 17.34 True \n", + " USC00510006 35.96 True \n", + "\n", + " station_temp_present min_bw_noaa \\\n", + "circle_id count_date noaa_id \n", + "87ymwsn 1968-12-26 USC00519130 True 0.0 \n", + "87ysunb 1971-12-27 US1HIKI0004 True 0.0 \n", + " US1HIKI0014 True 0.0 \n", + " US1HIKI0018 True 0.0 \n", + " USC00510006 True 0.0 \n", + "\n", + " max_bw_noaa temp_goodness goodness_grade \n", + "circle_id count_date noaa_id \n", + "87ymwsn 1968-12-26 USC00519130 0.0 NaN poor \n", + "87ysunb 1971-12-27 US1HIKI0004 0.0 NaN poor \n", + " US1HIKI0014 0.0 NaN poor \n", + " US1HIKI0018 0.0 NaN poor \n", + " USC00510006 0.0 NaN poor " + ] + }, + "execution_count": 278, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "temp_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Merge is the circle recorded data with the summary dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 279, + "metadata": {}, + "outputs": [], + "source": [ + "circle_obs = temp_df[['ui','circle_min_temp', 'circle_max_temp', 'circle_average_temp', 'Ecosys_station' ,'Nlcd_code_station']].groupby(['circle_id', 'count_date']).agg('first')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 280, + "metadata": {}, + "outputs": [], + "source": [ + "circle_data = circle_obs.join(circle_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 281, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uicircle_min_tempcircle_max_tempcircle_average_tempEcosys_stationNlcd_code_stationnumnum_notnanum_min_bw_noaanum_max_bw_noaanum_excellentnum_goodnum_fairnum_poorgoodness_modee_nume_num_notnae_num_min_bw_noaae_num_max_bw_noaae_num_excellente_num_goode_num_faire_num_poore_goodness_moden_numn_num_notnan_num_min_bw_noaan_num_max_bw_noaan_num_excellentn_num_goodn_num_fairn_num_poorn_goodness_modepe_pn_pgoodness_closestmax_temp_closestmax_temp_closest_valuemax_temp_closest_notnamin_temp_closestmin_temp_closest_valuemin_temp_closest_notnagoodness_lowestmax_temp_lowestmax_temp_lowest_valuemax_temp_lowest_notnamin_temp_lowestmin_temp_lowest_valuemin_temp_lowest_notnaconsensuse_consensusn_consensus
circle_idcount_date
87ymwsn1968-12-2622.0833-159.6667_196920.00000025.55555622.777778NaNNaN110.00.00001poor00NaNNaN0000NaN00NaNNaN0000NaN0.0NaNNaNpoorFalseNaN<NA>0.0NaN<NA>poorFalseNaNFalse0NaN0False<NA><NA>
87ysunb1971-12-2721.9667-159.4333_197223.33333325.55555624.444444NaNNaN13130.00.000013poor00NaNNaN0000NaN00NaNNaN0000NaN0.0NaNNaNpoorFalseNaN<NA>0.0NaN<NA>poorFalseNaNFalse0NaN0False<NA><NA>
1972-12-1721.9667-159.4333_197322.22222225.55555623.888889NaNNaN13130.00.000013poor00NaNNaN0000NaN00NaNNaN0000NaN0.0NaNNaNpoorFalseNaN<NA>0.0NaN<NA>poorFalseNaNFalse0NaN0False<NA><NA>
1973-12-1621.9667-159.4333_197417.22222227.77777822.500000NaNNaN13130.00.000013poor00NaNNaN0000NaN00NaNNaN0000NaN0.0NaNNaNpoorFalseNaN<NA>0.0NaN<NA>poorFalseNaNFalse0NaN0False<NA><NA>
1974-12-1521.9667-159.4333_197522.22222227.77777825.000000NaNNaN13130.00.000013poor00NaNNaN0000NaN00NaNNaN0000NaN0.0NaNNaNpoorFalseNaN<NA>0.0NaN<NA>poorFalseNaNFalse0NaN0False<NA><NA>
\n", + "
" + ], + "text/plain": [ + " ui circle_min_temp circle_max_temp \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 22.0833-159.6667_1969 20.000000 25.555556 \n", + "87ysunb 1971-12-27 21.9667-159.4333_1972 23.333333 25.555556 \n", + " 1972-12-17 21.9667-159.4333_1973 22.222222 25.555556 \n", + " 1973-12-16 21.9667-159.4333_1974 17.222222 27.777778 \n", + " 1974-12-15 21.9667-159.4333_1975 22.222222 27.777778 \n", + "\n", + " circle_average_temp Ecosys_station Nlcd_code_station \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 22.777778 NaN NaN \n", + "87ysunb 1971-12-27 24.444444 NaN NaN \n", + " 1972-12-17 23.888889 NaN NaN \n", + " 1973-12-16 22.500000 NaN NaN \n", + " 1974-12-15 25.000000 NaN NaN \n", + "\n", + " num num_notna num_min_bw_noaa num_max_bw_noaa \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 1 1 0.0 0.0 \n", + "87ysunb 1971-12-27 13 13 0.0 0.0 \n", + " 1972-12-17 13 13 0.0 0.0 \n", + " 1973-12-16 13 13 0.0 0.0 \n", + " 1974-12-15 13 13 0.0 0.0 \n", + "\n", + " num_excellent num_good num_fair num_poor \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 0 0 0 1 \n", + "87ysunb 1971-12-27 0 0 0 13 \n", + " 1972-12-17 0 0 0 13 \n", + " 1973-12-16 0 0 0 13 \n", + " 1974-12-15 0 0 0 13 \n", + "\n", + " goodness_mode e_num e_num_notna e_num_min_bw_noaa \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 poor 0 0 NaN \n", + "87ysunb 1971-12-27 poor 0 0 NaN \n", + " 1972-12-17 poor 0 0 NaN \n", + " 1973-12-16 poor 0 0 NaN \n", + " 1974-12-15 poor 0 0 NaN \n", + "\n", + " e_num_max_bw_noaa e_num_excellent e_num_good \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 NaN 0 0 \n", + "87ysunb 1971-12-27 NaN 0 0 \n", + " 1972-12-17 NaN 0 0 \n", + " 1973-12-16 NaN 0 0 \n", + " 1974-12-15 NaN 0 0 \n", + "\n", + " e_num_fair e_num_poor e_goodness_mode n_num \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 0 0 NaN 0 \n", + "87ysunb 1971-12-27 0 0 NaN 0 \n", + " 1972-12-17 0 0 NaN 0 \n", + " 1973-12-16 0 0 NaN 0 \n", + " 1974-12-15 0 0 NaN 0 \n", + "\n", + " n_num_notna n_num_min_bw_noaa n_num_max_bw_noaa \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 0 NaN NaN \n", + "87ysunb 1971-12-27 0 NaN NaN \n", + " 1972-12-17 0 NaN NaN \n", + " 1973-12-16 0 NaN NaN \n", + " 1974-12-15 0 NaN NaN \n", + "\n", + " n_num_excellent n_num_good n_num_fair n_num_poor \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 0 0 0 0 \n", + "87ysunb 1971-12-27 0 0 0 0 \n", + " 1972-12-17 0 0 0 0 \n", + " 1973-12-16 0 0 0 0 \n", + " 1974-12-15 0 0 0 0 \n", + "\n", + " n_goodness_mode p e_p n_p goodness_closest \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 NaN 0.0 NaN NaN poor \n", + "87ysunb 1971-12-27 NaN 0.0 NaN NaN poor \n", + " 1972-12-17 NaN 0.0 NaN NaN poor \n", + " 1973-12-16 NaN 0.0 NaN NaN poor \n", + " 1974-12-15 NaN 0.0 NaN NaN poor \n", + "\n", + " max_temp_closest max_temp_closest_value \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 False NaN \n", + "87ysunb 1971-12-27 False NaN \n", + " 1972-12-17 False NaN \n", + " 1973-12-16 False NaN \n", + " 1974-12-15 False NaN \n", + "\n", + " max_temp_closest_notna min_temp_closest \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 0.0 \n", + "87ysunb 1971-12-27 0.0 \n", + " 1972-12-17 0.0 \n", + " 1973-12-16 0.0 \n", + " 1974-12-15 0.0 \n", + "\n", + " min_temp_closest_value min_temp_closest_notna \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 NaN \n", + "87ysunb 1971-12-27 NaN \n", + " 1972-12-17 NaN \n", + " 1973-12-16 NaN \n", + " 1974-12-15 NaN \n", + "\n", + " goodness_lowest max_temp_lowest max_temp_lowest_value \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 poor False NaN \n", + "87ysunb 1971-12-27 poor False NaN \n", + " 1972-12-17 poor False NaN \n", + " 1973-12-16 poor False NaN \n", + " 1974-12-15 poor False NaN \n", + "\n", + " max_temp_lowest_notna min_temp_lowest \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 False 0 \n", + "87ysunb 1971-12-27 False 0 \n", + " 1972-12-17 False 0 \n", + " 1973-12-16 False 0 \n", + " 1974-12-15 False 0 \n", + "\n", + " min_temp_lowest_value min_temp_lowest_notna consensus \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 NaN 0 False \n", + "87ysunb 1971-12-27 NaN 0 False \n", + " 1972-12-17 NaN 0 False \n", + " 1973-12-16 NaN 0 False \n", + " 1974-12-15 NaN 0 False \n", + "\n", + " e_consensus n_consensus \n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 \n", + "87ysunb 1971-12-27 \n", + " 1972-12-17 \n", + " 1973-12-16 \n", + " 1974-12-15 " + ] + }, + "execution_count": 281, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "circle_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 282, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "68390" + ] + }, + "execution_count": 282, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Count the number of circles we have \n", + "circle_data['ui'].nunique()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The number of stations per circle" + ] + }, + { + "cell_type": "code", + "execution_count": 283, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 68390.000000\n", + "mean 6.025808\n", + "std 7.684488\n", + "min 1.000000\n", + "25% 1.000000\n", + "50% 3.000000\n", + "75% 8.000000\n", + "max 101.000000\n", + "Name: num, dtype: float64" + ] + }, + "execution_count": 283, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "circle_data.num.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## The Missing Station and Circle Data" + ] + }, + { + "cell_type": "code", + "execution_count": 284, + "metadata": {}, + "outputs": [], + "source": [ + "# Create an list of indexes with missing min OR max temp\n", + "circle_data_na_circle_idx = (circle_data.circle_min_temp.isna() | circle_data.circle_max_temp.isna())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 285, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Circles with missing Max or Min Temp Data: \n", + "10925\n", + "Circles with NO missing Temp Data: \n", + "57465\n" + ] + } + ], + "source": [ + "print(\"Circles with missing Max or Min Temp Data: \")\n", + "print(circle_data_na_circle_idx.sum())\n", + "print(\"Circles with NO missing Temp Data: \")\n", + "print(len(circle_data) - circle_data_na_circle_idx.sum())" + ] + }, + { + "cell_type": "code", + "execution_count": 286, + "metadata": {}, + "outputs": [], + "source": [ + "# Quick Look at Missing Max Temp\n", + "circle_data_maxna_circle_idx = circle_data.circle_max_temp.isna()" + ] + }, + { + "cell_type": "code", + "execution_count": 287, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Circles with missing MAX Temp Data: \n", + "10919\n", + "Circles with NO missing MAX Temp Data: \n", + "57471\n" + ] + } + ], + "source": [ + "print(\"Circles with missing MAX Temp Data: \")\n", + "print(circle_data_maxna_circle_idx.sum())\n", + "print(\"Circles with NO missing MAX Temp Data: \")\n", + "print(len(circle_data) - circle_data_maxna_circle_idx.sum())" + ] + }, + { + "cell_type": "code", + "execution_count": 288, + "metadata": {}, + "outputs": [], + "source": [ + "# Quick Look at Missing Min Temp\n", + "circle_data_minna_circle_idx = circle_data.circle_min_temp.isna()" + ] + }, + { + "cell_type": "code", + "execution_count": 289, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Circles with missing Min Temp Data: \n", + "10895\n", + "Circles with NO missing Min Temp Data: \n", + "57495\n" + ] + } + ], + "source": [ + "print(\"Circles with missing Min Temp Data: \")\n", + "print(circle_data_minna_circle_idx.sum())\n", + "print(\"Circles with NO missing Min Temp Data: \")\n", + "print(len(circle_data) - circle_data_minna_circle_idx.sum())" + ] + }, + { + "cell_type": "code", + "execution_count": 290, + "metadata": {}, + "outputs": [], + "source": [ + "# Quick Look at Missing BOTH min and max temp\n", + "circle_data_bothna_circle_idx = (circle_data.circle_min_temp.isna() & circle_data.circle_max_temp.isna())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 291, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Circles with missing Temp Data: \n", + "10889\n", + "Circles with NO missing Temp Data: \n", + "57501\n" + ] + } + ], + "source": [ + "print(\"Circles with missing Temp Data: \")\n", + "print(circle_data_bothna_circle_idx.sum())\n", + "print(\"Circles with NO missing Temp Data: \")\n", + "print(len(circle_data) - circle_data_bothna_circle_idx.sum())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Count and Percent of Stations Reporting if the Circle Reported Temp Data" + ] + }, + { + "cell_type": "code", + "execution_count": 292, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Stations with missing Temp Data IF the the Circle is Reporting:\n", + "0\n", + "0.0\n", + "Stations with NO missing Temp Data IF the Circle is Reporting: \n", + "57465\n", + "100.0\n" + ] + } + ], + "source": [ + "print(\"Stations with missing Temp Data IF the the Circle is Reporting:\")\n", + "print(circle_data[(~circle_data_na_circle_idx) & (circle_data.num_notna == 0)].shape[0])\n", + "print(circle_data[(~circle_data_na_circle_idx) & (circle_data.num_notna == 0)].shape[0] / len(circle_data[~circle_data_na_circle_idx]) * 100 )\n", + "print(\"Stations with NO missing Temp Data IF the Circle is Reporting: \")\n", + "print(circle_data[(~circle_data_na_circle_idx) & (circle_data.num_notna != 0)].shape[0])\n", + "print(circle_data[(~circle_data_na_circle_idx) & (circle_data.num_notna != 0)].shape[0] / len(circle_data[~circle_data_na_circle_idx]) * 100 )\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Count and Percent of Closest Stations Reporting if the Circle Reported Temp Data" + ] + }, + { + "cell_type": "code", + "execution_count": 293, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Closest Stations with missing Max Temp Data IF the the Circle is Reporting:\n", + "0\n", + "0.0\n", + "Closest Stations with NO missing Max Temp Data IF the Circle is Reporting: \n", + "57465\n", + "100.0\n" + ] + } + ], + "source": [ + "print(\"Closest Stations with missing Max Temp Data IF the the Circle is Reporting:\")\n", + "print(circle_data[(~circle_data_na_circle_idx) & (circle_data.max_temp_closest.isna())].shape[0])\n", + "print(circle_data[(~circle_data_na_circle_idx) & (circle_data.max_temp_closest.isna())].shape[0] / len(circle_data[~circle_data_na_circle_idx]) * 100 )\n", + "print(\"Closest Stations with NO missing Max Temp Data IF the Circle is Reporting: \")\n", + "print(circle_data[(~circle_data_na_circle_idx) & (~circle_data.max_temp_closest.isna())].shape[0])\n", + "print(circle_data[(~circle_data_na_circle_idx) & (~circle_data.max_temp_closest.isna())].shape[0] / len(circle_data[~circle_data_na_circle_idx]) * 100 )\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Count and Percent of Closest Elivation Stations Reporting if the Circle Reported Rain Data" + ] + }, + { + "cell_type": "code", + "execution_count": 294, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Closest Elivation Stations with missing Max Temp Data IF the the Circle is Reporting:\n", + "241\n", + "0.41938571304272165\n", + "Closest Elivation Stations with NO missing Max Temp Data IF the Circle is Reporting: \n", + "57224\n", + "99.58061428695729\n" + ] + } + ], + "source": [ + "print(\"Closest Elivation Stations with missing Max Temp Data IF the the Circle is Reporting:\")\n", + "print(circle_data[(~circle_data_na_circle_idx) & (circle_data.max_temp_lowest.isna())].shape[0])\n", + "print(circle_data[(~circle_data_na_circle_idx) & (circle_data.max_temp_lowest.isna())].shape[0] / len(circle_data[~circle_data_na_circle_idx]) * 100 )\n", + "print(\"Closest Elivation Stations with NO missing Max Temp Data IF the Circle is Reporting: \")\n", + "print(circle_data[(~circle_data_na_circle_idx) & (~circle_data.max_temp_lowest.isna())].shape[0])\n", + "print(circle_data[(~circle_data_na_circle_idx) & (~circle_data.max_temp_lowest.isna())].shape[0] / len(circle_data[~circle_data_na_circle_idx]) * 100 )\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Circle v Station: The Final Determination on If Volenteers are Reliable" + ] + }, + { + "cell_type": "code", + "execution_count": 295, + "metadata": {}, + "outputs": [], + "source": [ + "def crosstab_metrics(actl):\n", + " # accuracy\n", + " acc = actl.to_numpy().diagonal().sum() / actl.to_numpy().sum()\n", + " # precision\n", + " pr = actl.loc[1,1] / actl.to_numpy()[[1,0], [1,1]].sum()\n", + " # recall\n", + " re = actl.loc[1,1] / actl.to_numpy()[[1,1], [1,0]].sum()\n", + " # F1\n", + " f1 = 2 * pr * re / (pr + re)\n", + " \n", + " print(f\"accuracy: {acc*100:.2f}%\")\n", + " print(f\"precision: {pr*100:.2f}%\")\n", + " print(f\"recall: {re*100:.2f}%\")\n", + " print(f\"F1: {f1*100:.2f}%\")\n", + " \n", + "# return (acc, pr, re, f1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Based on the all Reference Stations for a CBC Circle" + ] + }, + { + "cell_type": "code", + "execution_count": 297, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cicles where the circle Min temp is in bounds for NO references stations:\n", + "39160\n", + "0.572598333089633\n", + "Cicles where the circle Min temp is in bounds for 0 to 50% matching references stations\n", + "14132\n", + "20.663839742652435\n", + "Cicles where the circle Min temp is in bounds for 50% to All matching references stations\n", + "2502\n", + "3.658429594970025\n", + "Cicles where the circle Min temp is in bounds for ALL references stations: \n", + "1701\n", + "0.024872057318321393\n" + ] + } + ], + "source": [ + "# Check that the Minimum and Maximun Values are within the NOAA bounds\n", + "\n", + "print(\"Cicles where the circle Min temp is in bounds for NO references stations:\")\n", + "print(((circle_data.num_min_bw_noaa / circle_data.num) == 0).sum())\n", + "print(((circle_data.num_min_bw_noaa / circle_data.num) == 0).sum() / circle_data.shape[0] * 1)\n", + "print(\"Cicles where the circle Min temp is in bounds for 0 to 50% matching references stations\")\n", + "print((((circle_data.num_min_bw_noaa / circle_data.num) > 0) & ((circle_data.num_min_bw_noaa / circle_data.num) < .50)).sum())\n", + "print((((circle_data.num_min_bw_noaa / circle_data.num) > 0) & ((circle_data.num_min_bw_noaa / circle_data.num) < .50)).sum() / circle_data.shape[0] * 100)\n", + "print(\"Cicles where the circle Min temp is in bounds for 50% to All matching references stations\")\n", + "print((((circle_data.num_min_bw_noaa / circle_data.num) < 1) & ((circle_data.num_min_bw_noaa / circle_data.num) >= .50)).sum())\n", + "print((((circle_data.num_min_bw_noaa / circle_data.num) < 1) & ((circle_data.num_min_bw_noaa / circle_data.num) >= .50)).sum() / circle_data.shape[0] * 100)\n", + "print(\"Cicles where the circle Min temp is in bounds for ALL references stations: \")\n", + "print(((circle_data.num_min_bw_noaa / circle_data.num) == 1).sum())\n", + "print(((circle_data.num_min_bw_noaa / circle_data.num) == 1).sum() / circle_data.shape[0] * 1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 300, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Circles where the circle max temp is in bounds for NO references stations:\n", + "40798\n", + "59.654920309986835\n", + "Cicles where the circle max temp is in bounds for 0 to 50% matching references stations\n", + "13008\n", + "19.020324608860946\n", + "Cicles where the circle max temp is in bounds for 50% to All matching references stations\n", + "2163\n", + "3.1627430910951895\n", + "Cicles where the circle max temp is in bounds for ALL references stations: \n", + "1502\n", + "2.1962275186430764\n" + ] + } + ], + "source": [ + "# Check that the maximum and Maximun Values are within the NOAA bounds\n", + "\n", + "print(\"Circles where the circle max temp is in bounds for NO references stations:\")\n", + "print(((circle_data.num_max_bw_noaa / circle_data.num) == 0).sum())\n", + "print(((circle_data.num_max_bw_noaa / circle_data.num) == 0).sum() / circle_data.shape[0] * 100)\n", + "print(\"Cicles where the circle max temp is in bounds for 0 to 50% matching references stations\")\n", + "print((((circle_data.num_max_bw_noaa / circle_data.num) > 0) & ((circle_data.num_max_bw_noaa / circle_data.num) < .50)).sum())\n", + "print((((circle_data.num_max_bw_noaa / circle_data.num) > 0) & ((circle_data.num_max_bw_noaa / circle_data.num) < .50)).sum() / circle_data.shape[0] * 100)\n", + "print(\"Cicles where the circle max temp is in bounds for 50% to All matching references stations\")\n", + "print((((circle_data.num_max_bw_noaa / circle_data.num) < 1) & ((circle_data.num_max_bw_noaa / circle_data.num) >= .50)).sum())\n", + "print((((circle_data.num_max_bw_noaa / circle_data.num) < 1) & ((circle_data.num_max_bw_noaa / circle_data.num) >= .50)).sum() / circle_data.shape[0] * 100)\n", + "print(\"Cicles where the circle max temp is in bounds for ALL references stations: \")\n", + "print(((circle_data.num_max_bw_noaa / circle_data.num) == 1).sum())\n", + "print(((circle_data.num_max_bw_noaa / circle_data.num) == 1).sum() / circle_data.shape[0] * 100)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Deep Dive on if Max Temperture Is In Bounds of Their Reference Stations" + ] + }, + { + "cell_type": "code", + "execution_count": 302, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uicircle_min_tempcircle_max_tempcircle_average_tempEcosys_stationNlcd_code_stationnumnum_notnanum_min_bw_noaanum_max_bw_noaanum_excellentnum_goodnum_fairnum_poorgoodness_modee_nume_num_notnae_num_min_bw_noaae_num_max_bw_noaae_num_excellente_num_goode_num_faire_num_poore_goodness_moden_numn_num_notnan_num_min_bw_noaan_num_max_bw_noaan_num_excellentn_num_goodn_num_fairn_num_poorn_goodness_modepe_pn_pgoodness_closestmax_temp_closestmax_temp_closest_valuemax_temp_closest_notnamin_temp_closestmin_temp_closest_valuemin_temp_closest_notnagoodness_lowestmax_temp_lowestmax_temp_lowest_valuemax_temp_lowest_notnamin_temp_lowestmin_temp_lowest_valuemin_temp_lowest_notnaconsensuse_consensusn_consensus
circle_idcount_date
87ymwsn1968-12-2622.0833-159.6667_196920.00000025.55555622.777778NaNNaN110.00.00001poor00NaNNaN0000NaN00NaNNaN0000NaN0.0NaNNaNpoorFalseNaN<NA>0.0NaN<NA>poorFalseNaNFalse0NaN0False<NA><NA>
87ysunb1971-12-2721.9667-159.4333_197223.33333325.55555624.444444NaNNaN13130.00.000013poor00NaNNaN0000NaN00NaNNaN0000NaN0.0NaNNaNpoorFalseNaN<NA>0.0NaN<NA>poorFalseNaNFalse0NaN0False<NA><NA>
1972-12-1721.9667-159.4333_197322.22222225.55555623.888889NaNNaN13130.00.000013poor00NaNNaN0000NaN00NaNNaN0000NaN0.0NaNNaNpoorFalseNaN<NA>0.0NaN<NA>poorFalseNaNFalse0NaN0False<NA><NA>
1973-12-1621.9667-159.4333_197417.22222227.77777822.500000NaNNaN13130.00.000013poor00NaNNaN0000NaN00NaNNaN0000NaN0.0NaNNaNpoorFalseNaN<NA>0.0NaN<NA>poorFalseNaNFalse0NaN0False<NA><NA>
1974-12-1521.9667-159.4333_197522.22222227.77777825.000000NaNNaN13130.00.000013poor00NaNNaN0000NaN00NaNNaN0000NaN0.0NaNNaNpoorFalseNaN<NA>0.0NaN<NA>poorFalseNaNFalse0NaN0False<NA><NA>
\n", + "
" + ], + "text/plain": [ + " ui circle_min_temp circle_max_temp \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 22.0833-159.6667_1969 20.000000 25.555556 \n", + "87ysunb 1971-12-27 21.9667-159.4333_1972 23.333333 25.555556 \n", + " 1972-12-17 21.9667-159.4333_1973 22.222222 25.555556 \n", + " 1973-12-16 21.9667-159.4333_1974 17.222222 27.777778 \n", + " 1974-12-15 21.9667-159.4333_1975 22.222222 27.777778 \n", + "\n", + " circle_average_temp Ecosys_station Nlcd_code_station \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 22.777778 NaN NaN \n", + "87ysunb 1971-12-27 24.444444 NaN NaN \n", + " 1972-12-17 23.888889 NaN NaN \n", + " 1973-12-16 22.500000 NaN NaN \n", + " 1974-12-15 25.000000 NaN NaN \n", + "\n", + " num num_notna num_min_bw_noaa num_max_bw_noaa \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 1 1 0.0 0.0 \n", + "87ysunb 1971-12-27 13 13 0.0 0.0 \n", + " 1972-12-17 13 13 0.0 0.0 \n", + " 1973-12-16 13 13 0.0 0.0 \n", + " 1974-12-15 13 13 0.0 0.0 \n", + "\n", + " num_excellent num_good num_fair num_poor \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 0 0 0 1 \n", + "87ysunb 1971-12-27 0 0 0 13 \n", + " 1972-12-17 0 0 0 13 \n", + " 1973-12-16 0 0 0 13 \n", + " 1974-12-15 0 0 0 13 \n", + "\n", + " goodness_mode e_num e_num_notna e_num_min_bw_noaa \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 poor 0 0 NaN \n", + "87ysunb 1971-12-27 poor 0 0 NaN \n", + " 1972-12-17 poor 0 0 NaN \n", + " 1973-12-16 poor 0 0 NaN \n", + " 1974-12-15 poor 0 0 NaN \n", + "\n", + " e_num_max_bw_noaa e_num_excellent e_num_good \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 NaN 0 0 \n", + "87ysunb 1971-12-27 NaN 0 0 \n", + " 1972-12-17 NaN 0 0 \n", + " 1973-12-16 NaN 0 0 \n", + " 1974-12-15 NaN 0 0 \n", + "\n", + " e_num_fair e_num_poor e_goodness_mode n_num \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 0 0 NaN 0 \n", + "87ysunb 1971-12-27 0 0 NaN 0 \n", + " 1972-12-17 0 0 NaN 0 \n", + " 1973-12-16 0 0 NaN 0 \n", + " 1974-12-15 0 0 NaN 0 \n", + "\n", + " n_num_notna n_num_min_bw_noaa n_num_max_bw_noaa \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 0 NaN NaN \n", + "87ysunb 1971-12-27 0 NaN NaN \n", + " 1972-12-17 0 NaN NaN \n", + " 1973-12-16 0 NaN NaN \n", + " 1974-12-15 0 NaN NaN \n", + "\n", + " n_num_excellent n_num_good n_num_fair n_num_poor \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 0 0 0 0 \n", + "87ysunb 1971-12-27 0 0 0 0 \n", + " 1972-12-17 0 0 0 0 \n", + " 1973-12-16 0 0 0 0 \n", + " 1974-12-15 0 0 0 0 \n", + "\n", + " n_goodness_mode p e_p n_p goodness_closest \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 NaN 0.0 NaN NaN poor \n", + "87ysunb 1971-12-27 NaN 0.0 NaN NaN poor \n", + " 1972-12-17 NaN 0.0 NaN NaN poor \n", + " 1973-12-16 NaN 0.0 NaN NaN poor \n", + " 1974-12-15 NaN 0.0 NaN NaN poor \n", + "\n", + " max_temp_closest max_temp_closest_value \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 False NaN \n", + "87ysunb 1971-12-27 False NaN \n", + " 1972-12-17 False NaN \n", + " 1973-12-16 False NaN \n", + " 1974-12-15 False NaN \n", + "\n", + " max_temp_closest_notna min_temp_closest \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 0.0 \n", + "87ysunb 1971-12-27 0.0 \n", + " 1972-12-17 0.0 \n", + " 1973-12-16 0.0 \n", + " 1974-12-15 0.0 \n", + "\n", + " min_temp_closest_value min_temp_closest_notna \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 NaN \n", + "87ysunb 1971-12-27 NaN \n", + " 1972-12-17 NaN \n", + " 1973-12-16 NaN \n", + " 1974-12-15 NaN \n", + "\n", + " goodness_lowest max_temp_lowest max_temp_lowest_value \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 poor False NaN \n", + "87ysunb 1971-12-27 poor False NaN \n", + " 1972-12-17 poor False NaN \n", + " 1973-12-16 poor False NaN \n", + " 1974-12-15 poor False NaN \n", + "\n", + " max_temp_lowest_notna min_temp_lowest \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 False 0 \n", + "87ysunb 1971-12-27 False 0 \n", + " 1972-12-17 False 0 \n", + " 1973-12-16 False 0 \n", + " 1974-12-15 False 0 \n", + "\n", + " min_temp_lowest_value min_temp_lowest_notna consensus \\\n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 NaN 0 False \n", + "87ysunb 1971-12-27 NaN 0 False \n", + " 1972-12-17 NaN 0 False \n", + " 1973-12-16 NaN 0 False \n", + " 1974-12-15 NaN 0 False \n", + "\n", + " e_consensus n_consensus \n", + "circle_id count_date \n", + "87ymwsn 1968-12-26 \n", + "87ysunb 1971-12-27 \n", + " 1972-12-17 \n", + " 1973-12-16 \n", + " 1974-12-15 " + ] + }, + "execution_count": 302, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "circle_data.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "#### Removing rows without temperature data for either CBC Circles or NOAA stations." + "### Maximum Temperture In bounds of NOAA Stations" ] }, { "cell_type": "code", - "execution_count": 13, - "metadata": {}, + "execution_count": 303, + "metadata": { + "scrolled": true + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Number of rows before: 756378\n", - "Number of rows after: 61777\n", - "Total removed: 694601\n" + "Temp Data Consensus Amoung All Stations where the Circle Reported Max Temp was within bounds:\n", + "False 8235\n", + "NaN 4848\n", + "True 1367\n", + "Name: consensus, dtype: Int64\n", + "^^ with Percentage:\n", + "False 56.989619\n", + "NaN 33.550173\n", + "True 9.460208\n", + "Name: consensus, dtype: float64\n", + "Temp Data Amoung Closest Stations where the Circle Reported Max Temp Between Bounds:\n", + "False 9908\n", + "True 4542\n", + "NaN 0\n", + "Name: max_temp_closest, dtype: Int64\n", + "^^ with Percentage:\n", + "False 68.567474\n", + "True 31.432526\n", + "NaN 0.000000\n", + "Name: max_temp_closest, dtype: float64\n", + "Snow Data Amoung Closest in Elivation Stations where the Circle Max Temp is within Bounds:\n", + "False 9845\n", + "True 4528\n", + "NaN 77\n", + "Name: max_temp_lowest, dtype: Int64\n", + "^^ with Percentage:\n", + "False 68.131488\n", + "True 31.335640\n", + "NaN 0.532872\n", + "Name: max_temp_lowest, dtype: float64\n", + "Snow Data Consensus Amoung Stations with the same nlcd code where the Circle Max Temp is within Bounds:\n", + "NaN 7609\n", + "False 4775\n", + "True 2066\n", + "Name: n_consensus, dtype: Int64\n", + "^^ with Percentage:\n", + "NaN 52.657439\n", + "False 33.044983\n", + "True 14.297578\n", + "Name: n_consensus, dtype: float64\n", + "Snow Data Consensus Amoung Stations with the same ecosys classification where the Circle Max Temp is within Bounds:\n", + "NaN 8114\n", + "False 4316\n", + "True 2020\n", + "Name: e_consensus, dtype: Int64\n", + "^^ with Percentage:\n", + "NaN 56.152249\n", + "False 29.868512\n", + "True 13.979239\n", + "Name: e_consensus, dtype: float64\n" ] } ], "source": [ - "temp_df.dropna(axis=0, subset=['circle_min_temp', 'circle_max_temp', 'noaa_min_temp', 'noaa_max_temp'], inplace=True)\n", - "print(f\"Number of rows before: {row_count}\")\n", - "print(f\"Number of rows after: {temp_df.shape[0]}\")\n", - "print(f\"Total removed: {row_count - temp_df.shape[0]}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "-----" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Out of Bounds Data " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Temperature Data\n", - "There are a number of outliers in the data set that could highly skew analysis. Any rows with a temperature outside of a min or max recorded temperature in the United States will be dropped.\n", + "print(\"Temp Data Consensus Amoung All Stations where the Circle Reported Max Temp was within bounds:\")\n", + "print(circle_data.loc[circle_data['num_max_bw_noaa'] == True].consensus.value_counts(dropna = False) )\n", + "print(\"^^ with Percentage:\")\n", + "print((circle_data.loc[circle_data['num_max_bw_noaa'] == True].consensus.value_counts(dropna = False) / len(circle_data.loc[circle_data['num_max_bw_noaa'] == True]) * 100))\n", "\n", - "To be conservative in data dropping we'll only using on max and one min for the entire country rather than by state or other locality. Additionally we'll check by each min/max temp for circles and stations to get an idea on if one is more error prone than another.\n", + "print(\"Temp Data Amoung Closest Stations where the Circle Reported Max Temp Between Bounds:\")\n", + "print(circle_data.loc[circle_data['num_max_bw_noaa'] == True].max_temp_closest.value_counts(dropna = False) )\n", + "print(\"^^ with Percentage:\")\n", + "print((circle_data.loc[circle_data['num_max_bw_noaa'] == True].max_temp_closest.value_counts(dropna = False) / len(circle_data.loc[circle_data['num_max_bw_noaa'] == True]) * 100))\n", "\n", - "Data: https://en.wikipedia.org/wiki/U.S._state_and_territory_temperature_extremes" + "print(\"Snow Data Amoung Closest in Elivation Stations where the Circle Max Temp is within Bounds:\")\n", + "print(circle_data.loc[circle_data['num_max_bw_noaa'] == True].max_temp_lowest.value_counts(dropna = False) )\n", + "print(\"^^ with Percentage:\")\n", + "print((circle_data.loc[circle_data['num_max_bw_noaa'] == True].max_temp_lowest.value_counts(dropna = False) / len(circle_data.loc[circle_data['num_max_bw_noaa'] == True]) * 100))\n", + "\n", + "print(\"Snow Data Consensus Amoung Stations with the same nlcd code where the Circle Max Temp is within Bounds:\")\n", + "print(circle_data.loc[circle_data['num_max_bw_noaa'] == True].n_consensus.value_counts(dropna = False) )\n", + "print(\"^^ with Percentage:\")\n", + "print((circle_data.loc[circle_data['num_max_bw_noaa'] == True].n_consensus.value_counts(dropna = False) / len(circle_data.loc[circle_data['num_max_bw_noaa'] == True]) * 100))\n", + "\n", + "print(\"Snow Data Consensus Amoung Stations with the same ecosys classification where the Circle Max Temp is within Bounds:\")\n", + "print(circle_data.loc[circle_data['num_max_bw_noaa'] == True].e_consensus.value_counts(dropna = False) )\n", + "print(\"^^ with Percentage:\")\n", + "print((circle_data.loc[circle_data['num_max_bw_noaa'] == True].e_consensus.value_counts(dropna = False) / len(circle_data.loc[circle_data['num_max_bw_noaa'] == True]) * 100))\n", + "\n" ] }, { "cell_type": "code", - "execution_count": 14, - "metadata": {}, + "execution_count": 304, + "metadata": { + "scrolled": true + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Number of CBC measurments outside max : 3\n", - "Number of NOAA measurments outside max : 14207\n", - "\n", - "Number of CBC measurments outside min : 0\n", - "Number of NOAA measurments outside min : 17098\n", - "\n", - "Number of NOAA stations with both outside : 125\n" + "Temp Data Consensus Amoung All Stations where the Circle did NOT Report Max Temp within Bounds:\n", + "False 40798\n", + "NaN 0\n", + "Name: consensus, dtype: Int64\n", + "^^ with Percentage:\n", + "False 100.0\n", + "NaN 0.0\n", + "Name: consensus, dtype: float64\n", + "Temp Data Amoung Closest Stations where the Circle did NOT Report Max Temp within Bounds:\n", + "False 40798\n", + "NaN 0\n", + "Name: max_temp_closest, dtype: Int64\n", + "^^ with Percentage:\n", + "False 100.0\n", + "NaN 0.0\n", + "Name: max_temp_closest, dtype: float64\n", + "Temp Data Amoung Closest in Elivation Stations where the Circle did NOT Report Max Temp within Bounds:\n", + "False 40639\n", + "NaN 159\n", + "Name: max_temp_lowest, dtype: Int64\n", + "^^ with Percentage:\n", + "False 99.610275\n", + "NaN 0.389725\n", + "Name: max_temp_lowest, dtype: float64\n", + "Temp Data Consensus Amoung Stations with the same nlcd code where the Circle did NOT Report Max Temp within Bounds:\n", + "False 24082\n", + "NaN 16716\n", + "Name: n_consensus, dtype: Int64\n", + "^^ with Percentage:\n", + "False 59.027403\n", + "NaN 40.972597\n", + "Name: n_consensus, dtype: float64\n", + "Temp Data Consensus Amoung Stations with the same ecosys classification where the Circle did NOT Report Max Temp within Bounds:\n", + "NaN 21107\n", + "False 19691\n", + "Name: e_consensus, dtype: Int64\n", + "^^ with Percentage:\n", + "NaN 51.735379\n", + "False 48.264621\n", + "Name: e_consensus, dtype: float64\n" ] } ], "source": [ - "# Creating variables for each drop condition\n", - "circle_over_max_temp = temp_df.loc[temp_df[\"circle_max_temp\"]>max_temp_check]\n", - "circle_under_min_temp = temp_df.loc[temp_df[\"circle_min_temp\"]max_temp_check]\n", - "noaa_under_min_temp = temp_df.loc[temp_df[\"noaa_min_temp\"] max_temp_check) & (temp_df[\"noaa_min_temp\"] < min_temp_check)].shape[0]}')\n", + "print(\"Temp Data Amoung Closest in Elivation Stations where the Circle did NOT Report Max Temp within Bounds:\")\n", + "print(circle_data.loc[circle_data['num_max_bw_noaa'] == False].max_temp_lowest.value_counts(dropna = False) )\n", + "print(\"^^ with Percentage:\")\n", + "print((circle_data.loc[circle_data['num_max_bw_noaa'] == False].max_temp_lowest.value_counts(dropna = False) / len(circle_data.loc[circle_data['num_max_bw_noaa'] == False]) * 100))\n", "\n", - "# Setting list of indices to drop\n", - "index_drop_list = list(circle_over_max_temp.index) + list(circle_under_min_temp.index) + list(noaa_over_max_temp.index) + list(noaa_under_min_temp.index)\n", + "print(\"Temp Data Consensus Amoung Stations with the same nlcd code where the Circle did NOT Report Max Temp within Bounds:\")\n", + "print(circle_data.loc[circle_data['num_max_bw_noaa'] == False].n_consensus.value_counts(dropna = False) )\n", + "print(\"^^ with Percentage:\")\n", + "print((circle_data.loc[circle_data['num_max_bw_noaa'] == False].n_consensus.value_counts(dropna = False) / len(circle_data.loc[circle_data['num_max_bw_noaa'] == False]) * 100))\n", "\n", - "# Dropping All out of bout roundsRows\n", - "temp_df.drop(index_drop_list, inplace=True)" + "print(\"Temp Data Consensus Amoung Stations with the same ecosys classification where the Circle did NOT Report Max Temp within Bounds:\")\n", + "print(circle_data.loc[circle_data['num_max_bw_noaa'] == False].e_consensus.value_counts(dropna = False) )\n", + "print(\"^^ with Percentage:\")\n", + "print((circle_data.loc[circle_data['num_max_bw_noaa'] == False].e_consensus.value_counts(dropna = False) / len(circle_data.loc[circle_data['num_max_bw_noaa'] == False]) * 100))\n", + "\n", + "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "#### Distance Data" + "### Minimum Temperture In bounds of NOAA Stations" ] }, { "cell_type": "code", - "execution_count": 15, - "metadata": {}, + "execution_count": 305, + "metadata": { + "scrolled": true + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Number of rows dropped outside of distance threshold: 23364\n" + "Temp Data Consensus Amoung All Stations where the Circle Reported Min Temp was within bounds:\n", + "False 11839\n", + "NaN 3019\n", + "True 858\n", + "Name: consensus, dtype: Int64\n", + "^^ with Percentage:\n", + "False 75.330873\n", + "NaN 19.209723\n", + "True 5.459404\n", + "Name: consensus, dtype: float64\n", + "Temp Data Amoung Closest Stations where the Circle Reported Min Temp Between Bounds:\n", + "0.0 10748\n", + "1.0 4968\n", + "Name: min_temp_closest, dtype: int64\n", + "^^ with Percentage:\n", + "0.0 68.388903\n", + "1.0 31.611097\n", + "Name: min_temp_closest, dtype: float64\n", + "Snow Data Amoung Closest in Elivation Stations where the Circle Min Temp is within Bounds:\n", + "0.0 10659\n", + "1.0 4969\n", + "NaN 88\n", + "Name: min_temp_lowest, dtype: int64\n", + "^^ with Percentage:\n", + "0.0 67.822601\n", + "1.0 31.617460\n", + "NaN 0.559939\n", + "Name: min_temp_lowest, dtype: float64\n", + "Snow Data Consensus Amoung Stations with the same nlcd code where the Circle Min Temp is within Bounds:\n", + "False 7547\n", + "NaN 6872\n", + "True 1297\n", + "Name: n_consensus, dtype: Int64\n", + "^^ with Percentage:\n", + "False 48.021125\n", + "NaN 43.726139\n", + "True 8.252736\n", + "Name: n_consensus, dtype: float64\n", + "Snow Data Consensus Amoung Stations with the same ecosys classification where the Circle Min Temp is within Bounds:\n", + "NaN 7877\n", + "False 6587\n", + "True 1252\n", + "Name: e_consensus, dtype: Int64\n", + "^^ with Percentage:\n", + "NaN 50.120896\n", + "False 41.912700\n", + "True 7.966404\n", + "Name: e_consensus, dtype: float64\n" ] } ], "source": [ - "# Dropping rows with distance differences larger then set threshold\n", - "temp_df.drop(temp_df[temp_df['distance_diff'] > distance_threshold].index, inplace=True)\n", - "print(f'Number of rows dropped outside of distance threshold: {temp_df.shape[0]}')" + "print(\"Temp Data Consensus Amoung All Stations where the Circle Reported Min Temp was within bounds:\")\n", + "print(circle_data.loc[circle_data['num_min_bw_noaa'] == True].consensus.value_counts(dropna = False) )\n", + "print(\"^^ with Percentage:\")\n", + "print((circle_data.loc[circle_data['num_min_bw_noaa'] == True].consensus.value_counts(dropna = False) / len(circle_data.loc[circle_data['num_min_bw_noaa'] == True]) * 100))\n", + "\n", + "print(\"Temp Data Amoung Closest Stations where the Circle Reported Min Temp Between Bounds:\")\n", + "print(circle_data.loc[circle_data['num_min_bw_noaa'] == True].min_temp_closest.value_counts(dropna = False) )\n", + "print(\"^^ with Percentage:\")\n", + "print((circle_data.loc[circle_data['num_min_bw_noaa'] == True].min_temp_closest.value_counts(dropna = False) / len(circle_data.loc[circle_data['num_min_bw_noaa'] == True]) * 100))\n", + "\n", + "print(\"Snow Data Amoung Closest in Elivation Stations where the Circle Min Temp is within Bounds:\")\n", + "print(circle_data.loc[circle_data['num_min_bw_noaa'] == True].min_temp_lowest.value_counts(dropna = False) )\n", + "print(\"^^ with Percentage:\")\n", + "print((circle_data.loc[circle_data['num_min_bw_noaa'] == True].min_temp_lowest.value_counts(dropna = False) / len(circle_data.loc[circle_data['num_min_bw_noaa'] == True]) * 100))\n", + "\n", + "print(\"Snow Data Consensus Amoung Stations with the same nlcd code where the Circle Min Temp is within Bounds:\")\n", + "print(circle_data.loc[circle_data['num_min_bw_noaa'] == True].n_consensus.value_counts(dropna = False) )\n", + "print(\"^^ with Percentage:\")\n", + "print((circle_data.loc[circle_data['num_min_bw_noaa'] == True].n_consensus.value_counts(dropna = False) / len(circle_data.loc[circle_data['num_min_bw_noaa'] == True]) * 100))\n", + "\n", + "print(\"Snow Data Consensus Amoung Stations with the same ecosys classification where the Circle Min Temp is within Bounds:\")\n", + "print(circle_data.loc[circle_data['num_min_bw_noaa'] == True].e_consensus.value_counts(dropna = False) )\n", + "print(\"^^ with Percentage:\")\n", + "print((circle_data.loc[circle_data['num_min_bw_noaa'] == True].e_consensus.value_counts(dropna = False) / len(circle_data.loc[circle_data['num_min_bw_noaa'] == True]) * 100))\n", + "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "#### Elevation Data" + "## Population Compasison \n", + "Compare the opulations of the Max Temps reported by the cbc circles and the Max temps recorded by the NOAA circles " ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 313, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Number of rows dropped outside of elevation threshold: 16986\n" + "The number of cases for CBC Max Temps is: 68390\n", + "The number of cases for NOAA Max Temps is: 68390\n" ] } ], "source": [ - "# Dropping rows with circles and stations that are over the elevation threshold\n", - "temp_df.drop(temp_df[temp_df['elevation_diff'] > elevation_threshold].index, inplace=True)\n", + "# Set up the two populations\n", + "cbc_max_temps = circle_data['circle_max_temp']\n", + "noaa_max_temps = circle_data['max_temp_closest_value']\n", "\n", - "# Dropping rows with no elevation data\n", - "temp_df.dropna(subset=['circle_elevation', 'noaa_elevation'], inplace=True)\n", - "print(f'Number of rows dropped outside of elevation threshold: {temp_df.shape[0]}')" + "print(\"The number of cases for CBC Max Temps is: \" + str(cbc_max_temps.shape[0]))\n", + "print(\"The number of cases for NOAA Max Temps is: \" + str(noaa_max_temps.shape[0]))" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 314, "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The number of cases for CBC Max Temps is: 57471\n", + "The number of cases for NOAA Max Temps is: 18466\n" + ] + } + ], "source": [ - "#### Checking to see how many CBC Circle temperatures records are within the bounds of the NOAA Station records" + "# drop the Nan Values \n", + "cbc_max_temps = cbc_max_temps.dropna()\n", + "noaa_max_temps = noaa_max_temps.dropna()\n", + "\n", + "print(\"The number of cases for CBC Max Temps is: \" + str(cbc_max_temps.shape[0]))\n", + "print(\"The number of cases for NOAA Max Temps is: \" + str(noaa_max_temps.shape[0]))" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 315, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "count 57471.000000\n", + "mean 5.589304\n", + "std 9.684150\n", + "min -45.000000\n", + "25% -0.555556\n", + "50% 4.444444\n", + "75% 12.222222\n", + "max 56.000000\n", + "Name: circle_max_temp, dtype: float64" + ] + }, + "execution_count": 315, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "temp_df['temp_check'] = temp_df['circle_average_temp'].between(temp_df['noaa_min_temp'], temp_df['noaa_max_temp'])" + "cbc_max_temps.describe()" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 316, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of CBC Cirlcs who's temperature is in the bounds of the corresponding NOAA station: 10544\n", - "Number of CBC Cirlcs who's temperature is not in the bounds of the corresponding NOAA station: 6442\n", - "\n", - "62% of stations lay between\n" - ] + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 316, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYYAAAD4CAYAAADo30HgAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAUtklEQVR4nO3df4xdZ33n8fenSRNSWnAg3VlkR2tLWFQBFzU7CqkiVSNSJU6CcP6gKCgCh3prVRtYumsJHPgjEhApqJumoBZWFklrqoiQTaliNSnBGzKqkDYmBCghCWlGwRBbCaE4CTXZwk72u3/cx+19jJ2x7x3PtWfeL2k05zznOfc8X58Zf+b8uOemqpAk6ZBfmvQAJEknF4NBktQxGCRJHYNBktQxGCRJndMnPYBRnXPOObV27dol3+5Pf/pTXvnKVy75didlpdULK69m613eDq/3oYce+qeq+vWXW+eUDYa1a9fy9a9/fcm3Ozs7y8zMzJJvd1JWWr2w8mq23uXt8HqTfH+hdTyVJEnqGAySpI7BIEnqGAySpI7BIEnqGAySpI7BIEnqGAySpI7BIEnqnLLvfJZOpLXb7x553b03XrGII5GWnkcMkqSOwSBJ6hgMkqSOwSBJ6hgMkqSOwSBJ6hgMkqSOwSBJ6hgMkqSOwSBJ6hgMkqSOwSBJ6hgMkqTOgsGQ5NYkzyb5zlDbHyf5bpJvJ/mbJKuGll2XZC7J40kuHWrf2Nrmkmwfal+XZE9r/0KSMxazQEnS8TmWI4a/BDYe1rYbeFNV/Sbwj8B1AEnOA64C3tjW+XSS05KcBvw5cBlwHvCu1hfgE8DNVfV64Dlgy1gVSZLGsmAwVNXfAwcOa/tyVc232QeANW16E3B7Vf2sqr4HzAEXtK+5qnqyqn4O3A5sShLgrcCdbf2dwJVj1iRJGsNifFDP7wNfaNOrGQTFIftaG8BTh7W/BXgt8PxQyAz3/wVJtgJbAaamppidnR137Mft4MGDE9nupKy0emFQ87YNL428/qn277XS9rH1LmysYEjyEWAeuG2c1zlWVbUD2AEwPT1dMzMzS7HZzuzsLJPY7qSstHphUPNNX/3pyOvvvXpm8QazBFbaPrbehY0cDEmuAd4GXFxV1Zr3A+cOdVvT2jhK+4+BVUlOb0cNw/0lSRMw0u2qSTYCHwTeXlUvDi3aBVyV5Mwk64D1wNeAB4H17Q6kMxhcoN7VAuV+4B1t/c3AXaOVIklaDMdyu+rngf8NvCHJviRbgD8Dfg3YneRbSf4HQFU9AtwBPAp8Cbi2ql5qRwPvA+4FHgPuaH0BPgT8tyRzDK453LKoFUqSjsuCp5Kq6l1HaD7qf95VdQNwwxHa7wHuOUL7kwzuWpIknQR857MkqWMwSJI6BoMkqWMwSJI6BoMkqWMwSJI6BoMkqWMwSJI6BoMkqWMwSJI6BoMkqbMYH9Qjacja7XePvO7eG69YxJFIo/GIQZLUMRgkSR2DQZLUMRgkSR2DQZLUMRgkSR2DQZLUMRgkSR2DQZLUMRgkSZ0FgyHJrUmeTfKdobbXJNmd5In2/ezWniSfSjKX5NtJzh9aZ3Pr/0SSzUPt/zHJw22dTyXJYhcpSTp2x3LE8JfAxsPatgP3VdV64L42D3AZsL59bQU+A4MgAa4H3gJcAFx/KExanz8YWu/wbUmSltCCwVBVfw8cOKx5E7CzTe8Erhxq/1wNPACsSvI64FJgd1UdqKrngN3AxrbsVVX1QFUV8Lmh15IkTcCoT1edqqqn2/QzwFSbXg08NdRvX2t7ufZ9R2g/oiRbGRyJMDU1xezs7IjDH93Bgwcnst1JWWn1wqDmbRtemsi2/Zk+8ax3YWM/druqKkmN+zrHuK0dwA6A6enpmpmZWYrNdmZnZ5nEdidlpdULg5pv+upPJ7LtvVfPLPk2V9o+tt6FjXpX0g/baSDa92db+37g3KF+a1rby7WvOUK7JGlCRg2GXcChO4s2A3cNtb+n3Z10IfBCO+V0L3BJkrPbRedLgHvbsp8kubDdjfSeodeSJE3AgqeSknwemAHOSbKPwd1FNwJ3JNkCfB94Z+t+D3A5MAe8CLwXoKoOJPkY8GDr99GqOnRB+z8zuPPpLODv2pckaUIWDIaqetdRFl18hL4FXHuU17kVuPUI7V8H3rTQOCRJS8N3PkuSOgaDJKljMEiSOmO/j0E6Wa3dfvdI623bMI+/GlrJPGKQJHUMBklSx2CQJHUMBklSx2CQJHUMBklSx2CQJHUMBklSx2CQJHUMBklSx2CQJHUMBklSx2CQJHUMBklSx2CQJHUMBklSx2CQJHUMBklSx88vlE4io34c6SF7b7xikUailWysI4Yk/zXJI0m+k+TzSV6RZF2SPUnmknwhyRmt75ltfq4tXzv0Ote19seTXDpeSZKkcYwcDElWA/8FmK6qNwGnAVcBnwBurqrXA88BW9oqW4DnWvvNrR9JzmvrvRHYCHw6yWmjjkuSNJ5xrzGcDpyV5HTgV4CngbcCd7blO4Er2/SmNk9bfnGStPbbq+pnVfU9YA64YMxxSZJGNPI1hqran+S/Az8A/g/wZeAh4Pmqmm/d9gGr2/Rq4Km27nySF4DXtvYHhl56eJ1Okq3AVoCpqSlmZ2dHHf7IDh48OJHtTsqpXO+2DfMLdzqCqbNGX3fSRtlXp/I+HoX1LmzkYEhyNoO/9tcBzwP/k8GpoBOmqnYAOwCmp6drZmbmRG7uiGZnZ5nEdiflVK73mhEv5G7bMM9ND5+a92XsvXrmuNc5lffxKKx3YeOcSvpd4HtV9aOq+r/AF4GLgFXt1BLAGmB/m94PnAvQlr8a+PFw+xHWkSQtsXGC4QfAhUl+pV0ruBh4FLgfeEfrsxm4q03vavO05V+pqmrtV7W7ltYB64GvjTEuSdIYxrnGsCfJncA3gHngmwxO89wN3J7k463tlrbKLcBfJZkDDjC4E4mqeiTJHQxCZR64tqpeGnVckqTxjHUitaquB64/rPlJjnBXUVX9C/B7R3mdG4AbxhmLJGlx+EgMSVLHYJAkdQwGSVLHYJAkdQwGSVLHYJAkdQwGSVLHYJAkdQwGSVLHYJAkdQwGSVLHYJAkdQwGSVLHYJAkdQwGSVLHYJAkdQwGSVLHYJAkdQwGSVLHYJAkdQwGSVLHYJAkdQwGSVJnrGBIsirJnUm+m+SxJL+d5DVJdid5on0/u/VNkk8lmUvy7STnD73O5tb/iSSbxy1KkjS6cY8YPgl8qap+A3gz8BiwHbivqtYD97V5gMuA9e1rK/AZgCSvAa4H3gJcAFx/KEwkSUtv5GBI8mrgd4BbAKrq51X1PLAJ2Nm67QSubNObgM/VwAPAqiSvAy4FdlfVgap6DtgNbBx1XJKk8Zw+xrrrgB8Bf5HkzcBDwAeAqap6uvV5Bphq06uBp4bW39fajtb+C5JsZXC0wdTUFLOzs2MMfzQHDx6cyHYn5VSud9uG+ZHWmzpr9HUnbZR9dSrv41FY78LGCYbTgfOB91fVniSf5N9OGwFQVZWkxthGp6p2ADsApqena2ZmZrFe+pjNzs4yie1Oyqlc7zXb7x5pvW0b5rnp4XF+NSZn79Uzx73OqbyPR2G9CxvnGsM+YF9V7WnzdzIIih+2U0S078+25fuBc4fWX9PajtYuSZqAkYOhqp4BnkryhtZ0MfAosAs4dGfRZuCuNr0LeE+7O+lC4IV2yule4JIkZ7eLzpe0NknSBIx7vPx+4LYkZwBPAu9lEDZ3JNkCfB94Z+t7D3A5MAe82PpSVQeSfAx4sPX7aFUdGHNckqQRjRUMVfUtYPoIiy4+Qt8Crj3K69wK3DrOWCRJi8N3PkuSOgaDJKljMEiSOgaDJKljMEiSOgaDJKljMEiSOgaDJKljMEiSOgaDJKljMEiSOgaDJKljMEiSOgaDJKljMEiSOgaDJKljMEiSOgaDJKljMEiSOmN95rN0Iq3dfvekhyCtSB4xSJI6HjFIy8goR1nbNsxzzfa72XvjFSdgRDoVjX3EkOS0JN9M8rdtfl2SPUnmknwhyRmt/cw2P9eWrx16jeta++NJLh13TJKk0S3GqaQPAI8NzX8CuLmqXg88B2xp7VuA51r7za0fSc4DrgLeCGwEPp3ktEUYlyRpBGMFQ5I1wBXAZ9t8gLcCd7YuO4Er2/SmNk9bfnHrvwm4vap+VlXfA+aAC8YZlyRpdONeY/hT4IPAr7X51wLPV9V8m98HrG7Tq4GnAKpqPskLrf9q4IGh1xxep5NkK7AVYGpqitnZ2TGHf/wOHjw4ke1OyiTr3bZhfuFOJ8DUWZPb9iQcqnel/Fz7O7ywkYMhyduAZ6vqoSQzo77O8aiqHcAOgOnp6ZqZWZLNdmZnZ5nEdidlkvVeM6HbVbdtmOemh1fOfRmH6t179cykh7Ik/B1e2Dg//RcBb09yOfAK4FXAJ4FVSU5vRw1rgP2t/37gXGBfktOBVwM/Hmo/ZHgdSdISG/kaQ1VdV1Vrqmotg4vHX6mqq4H7gXe0bpuBu9r0rjZPW/6VqqrWflW7a2kdsB742qjjkiSN50QcL38IuD3Jx4FvAre09luAv0oyBxxgECZU1SNJ7gAeBeaBa6vqpRMwLknSMViUYKiqWWC2TT/JEe4qqqp/AX7vKOvfANywGGORJI3HR2JIkjoGgySpYzBIkjoGgySpYzBIkjoGgySpYzBIkjoGgySps3KeFCbpZY3zGdt++tvy4hGDJKljMEiSOgaDJKljMEiSOgaDJKljMEiSOgaDJKljMEiSOgaDJKljMEiSOgaDJKljMEiSOgaDJKljMEiSOiMHQ5Jzk9yf5NEkjyT5QGt/TZLdSZ5o389u7UnyqSRzSb6d5Pyh19rc+j+RZPP4ZUmSRjXOEcM8sK2qzgMuBK5Nch6wHbivqtYD97V5gMuA9e1rK/AZGAQJcD3wFuAC4PpDYSJJWnojB0NVPV1V32jT/ww8BqwGNgE7W7edwJVtehPwuRp4AFiV5HXApcDuqjpQVc8Bu4GNo45LkjSeRfkEtyRrgd8C9gBTVfV0W/QMMNWmVwNPDa22r7Udrf1I29nK4GiDqakpZmdnF2P4x+XgwYMT2e6kTLLebRvmJ7LdqbMmt+1JWIx6T6XfCX+HFzZ2MCT5VeCvgT+qqp8k+ddlVVVJatxtDL3eDmAHwPT0dM3MzCzWSx+z2dlZJrHdSZlkvdeM8VGT49i2YZ6bHl45n3q7GPXuvXpmcQazBPwdXthYdyUl+WUGoXBbVX2xNf+wnSKifX+2te8Hzh1afU1rO1q7JGkCxrkrKcAtwGNV9SdDi3YBh+4s2gzcNdT+nnZ30oXAC+2U073AJUnObhedL2ltkqQJGOf48SLg3cDDSb7V2j4M3AjckWQL8H3gnW3ZPcDlwBzwIvBegKo6kORjwIOt30er6sAY45IkjWHkYKiqrwI5yuKLj9C/gGuP8lq3AreOOhadvNZO6DqBpNH5zmdJUsdgkCR1DAZJUsdgkCR1DAZJUsdgkCR1DAZJUsdgkCR1Vs6TwiSdMOO8kXHvjVcs4ki0GDxikCR1DAZJUsdgkCR1DAZJUsdgkCR1vCtJC/LR2dLK4hGDJKljMEiSOgaDJKljMEiSOgaDJKljMEiSOgaDJKnj+xhWiFHfi7Btwzz+mEgry0lzxJBkY5LHk8wl2T7p8UjSSnVSBEOS04A/By4DzgPeleS8yY5Kklamk+UcwQXAXFU9CZDkdmAT8OhER7XIfLSE9Iv8kJ+TT6pq0mMgyTuAjVX1n9r8u4G3VNX7Duu3FdjaZt8APL6kAx04B/inCWx3UlZavbDyarbe5e3wev9DVf36y61wshwxHJOq2gHsmOQYkny9qqYnOYaltNLqhZVXs/Uub6PUe1JcYwD2A+cOza9pbZKkJXayBMODwPok65KcAVwF7JrwmCRpRTopTiVV1XyS9wH3AqcBt1bVIxMe1tFM9FTWBKy0emHl1Wy9y9tx13tSXHyWJJ08TpZTSZKkk4TBIEnqGAzHKcm2JJXknDafJJ9qj/L4dpLzJz3GxZDkj5N8t9X0N0lWDS27rtX7eJJLJznOxbTcH8uS5Nwk9yd5NMkjST7Q2l+TZHeSJ9r3syc91sWU5LQk30zyt21+XZI9bT9/od3wsmwkWZXkzvb7+1iS3z7efWwwHIck5wKXAD8Yar4MWN++tgKfmcDQToTdwJuq6jeBfwSuA2iPKrkKeCOwEfh0e6TJKW2FPJZlHthWVecBFwLXthq3A/dV1Xrgvja/nHwAeGxo/hPAzVX1euA5YMtERnXifBL4UlX9BvBmBrUf1z42GI7PzcAHgeEr9puAz9XAA8CqJK+byOgWUVV9uarm2+wDDN5bAoN6b6+qn1XV94A5Bo80OdX962NZqurnwKHHsiwbVfV0VX2jTf8zg/8wVjOoc2frthO4cjIjXHxJ1gBXAJ9t8wHeCtzZuiy3el8N/A5wC0BV/byqnuc497HBcIySbAL2V9U/HLZoNfDU0Py+1rac/D7wd216uda7XOs6oiRrgd8C9gBTVfV0W/QMMDWhYZ0If8rgj7n/1+ZfCzw/9EfPctvP64AfAX/RTp99NskrOc59fFK8j+FkkeR/Af/+CIs+AnyYwWmkZePl6q2qu1qfjzA4BXHbUo5NJ06SXwX+GvijqvrJ4I/ogaqqJMviHvYkbwOeraqHksxMejxL5HTgfOD9VbUnySc57LTRsexjg2FIVf3ukdqTbGCQxP/QfonWAN9IcgGn8OM8jlbvIUmuAd4GXFz/9oaXU7beBSzXujpJfplBKNxWVV9szT9M8rqqerqdBn12ciNcVBcBb09yOfAK4FUMzr+vSnJ6O2pYbvt5H7Cvqva0+TsZBMNx7WNPJR2Dqnq4qv5dVa2tqrUM/vHPr6pnGDy64z3t7qQLgReGDtlOWUk2MjgEf3tVvTi0aBdwVZIzk6xjcNH9a5MY4yJb9o9laefXbwEeq6o/GVq0C9jcpjcDdy312E6Eqrquqta039mrgK9U1dXA/cA7WrdlUy9A+z/pqSRvaE0XM/j4guPaxx4xjO8e4HIGF2FfBN472eEsmj8DzgR2t6OkB6rqD6vqkSR3MPhhmweuraqXJjjORXGKPZZlVBcB7wYeTvKt1vZh4EbgjiRbgO8D75zQ+JbKh4Dbk3wc+CbtQu0y8n7gtvYHzpMM/k/6JY5jH/tIDElSx1NJkqSOwSBJ6hgMkqSOwSBJ6hgMkqSOwSBJ6hgMkqTO/wfM5LZby84grAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" } ], "source": [ - "# Counting number of circles that are true\n", - "temp_true = sum(temp_df['temp_check'])\n", - "temp_false = temp_df.shape[0] - sum(temp_df['temp_check'])\n", - "print(f\"Number of CBC Cirlcs who's temperature is in the bounds of the corresponding NOAA station: {sum(temp_df['temp_check'])}\")\n", - "print(f\"Number of CBC Cirlcs who's temperature is not in the bounds of the corresponding NOAA station: {temp_df.shape[0] - sum(temp_df['temp_check'])}\")\n", - "print()\n", - "print(f\"{round((temp_true/temp_df.shape[0])*100)}% of stations lay between\")" + "cbc_max_temps.hist(bins =20)" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 317, "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 18466.000000\n", + "mean 5.931729\n", + "std 8.994429\n", + "min -35.600000\n", + "25% 0.000000\n", + "50% 5.000000\n", + "75% 11.700000\n", + "max 33.900000\n", + "Name: max_temp_closest_value, dtype: float64" + ] + }, + "execution_count": 317, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "---\n", - "## Temperature Measurement Goodness\n", - "\n", - "temp_metric = sqrt( (noaa_min_temp - circle_min_temp)^2 + (noaa_max_temp - circle_max_temp)^2 )" + "noaa_max_temps.describe()" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 318, "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 318, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAWxElEQVR4nO3dfWxd9X3H8feHECDCHQmDXaUhmlM1W0fJGsBKqDpNNqwQwrRQqa2CEA2Uyt1EtlaKNkJRC4UypWspEiqlcpWMsLK6GQXhQRhNUzzEH4GQNsR5KMOFMGKFRG1CigvLZvbdH/dndjF+uL6+T87v85KOfM73PNzvub7++Nxzz71XEYGZmeXhpEY3YGZm9ePQNzPLiEPfzCwjDn0zs4w49M3MMnJyoxsYz1lnnRWtra2NbmNMv/3tbzn99NMb3UbZ3G9tTad+p1Ov4H4na8eOHb+KiLNHm9fUod/a2spzzz3X6DbG1NvbS3t7e6PbKJv7ra3p1O906hXc72RJemWseT69Y2aWEYe+mVlGHPpmZhlx6JuZZcShb2aWEYe+mVlGHPpmZhlx6JuZZWTC0Jd0mqRnJT0vaY+kr6b6fZJelrQzDYtTXZLultQvaZekC0q2tUrSi2lYVbvdMjOz0ZTzjtzjwMURMShpJvC0pMfTvL+NiAdHLH85sDANS4F7gaWSzgRuAdqAAHZI6omIo9XYEbPppHXtYxWvu3/dFVXsxHIz4ZF+FA2myZlpGO/rtlYA96f1tgGzJc0FLgO2RMSRFPRbgGVTa9/MzCZD5XxdoqQZwA7gg8A9EXGjpPuAj1J8JrAVWBsRxyU9CqyLiKfTuluBG4F24LSI+Fqqfxl4KyK+OeK2OoFOgEKhcGF3d3c19rMmBgcHaWlpaXQbZXO/tTWZfvsGjlV8O4vmnVHxusNO5Pu2GTS6346Ojh0R0TbavLI+cC0i3gYWS5oNPCzpPOAm4DXgFKCLYrDfNtVmI6IrbY+2trZo5g9ZavSHKk2W+62tyfR77VRO71xd3m2M50S+b5tBM/c7qat3IuJ14ElgWUQcTKdwjgP/CCxJiw0A80tWOyfVxqqbmVmdlHP1ztnpCB9Js4CPA79I5+mRJOBKYHdapQf4TLqK5yLgWEQcBJ4ALpU0R9Ic4NJUMzOzOinn9M5cYGM6r38SsCkiHpX0U0lnAwJ2An+Zlt8MLAf6gTeB6wAi4oik24HtabnbIuJI9XbFzMwmMmHoR8Qu4PxR6hePsXwAN4wxbwOwYZI9mplZlfgduWZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYmDH1Jp0l6VtLzkvZI+mqqL5D0jKR+ST+UdEqqn5qm+9P81pJt3ZTqL0i6rFY7ZWZmoyvnSP84cHFEfARYDCyTdBHwdeCuiPggcBS4Pi1/PXA01e9KyyHpXGAl8GFgGfAdSTOquTNmZja+CUM/igbT5Mw0BHAx8GCqbwSuTOMr0jRp/iWSlOrdEXE8Il4G+oElVdkLMzMriyJi4oWKR+Q7gA8C9wDfALalo3kkzQcej4jzJO0GlkXEgTTvl8BS4Na0zvdTfX1a58ERt9UJdAIUCoULu7u7q7GfNTE4OEhLS0uj2yib+62tyfTbN3Cs4ttZNO+MitcddiLft82g0f12dHTsiIi20eadXM4GIuJtYLGk2cDDwIeq2N/I2+oCugDa2tqivb29Vjc1Zb29vTRzfyO539qaTL/Xrn2s4tvZf3V5tzGeE/m+bQbN3O+krt6JiNeBJ4GPArMlDf/TOAcYSOMDwHyANP8M4Nel9VHWMTOzOijn6p2z0xE+kmYBHwf2UQz/T6bFVgGPpPGeNE2a/9MonkPqAVamq3sWAAuBZ6u1I2ZmNrFyTu/MBTam8/onAZsi4lFJe4FuSV8Dfg6sT8uvB/5JUj9whOIVO0TEHkmbgL3AEHBDOm1kZmZ1MmHoR8Qu4PxR6i8xytU3EfFfwKfG2NYdwB2Tb9PMzKrB78g1M8uIQ9/MLCMOfTOzjDj0zcwy4tA3M8tIWe/INbP3ah3xrto1i4am9E5bs3rwkb6ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGZkw9CXNl/SkpL2S9kj6QqrfKmlA0s40LC9Z5yZJ/ZJekHRZSX1ZqvVLWlubXTIzs7GU83n6Q8CaiPiZpPcBOyRtSfPuiohvli4s6VxgJfBh4P3ATyT9QZp9D/Bx4ACwXVJPROytxo6YmdnEJgz9iDgIHEzjb0jaB8wbZ5UVQHdEHAdeltQPLEnz+iPiJQBJ3WlZh76ZWZ1M6py+pFbgfOCZVFotaZekDZLmpNo84NWS1Q6k2lh1MzOrE0VEeQtKLcC/A3dExEOSCsCvgABuB+ZGxGclfRvYFhHfT+utBx5Pm1kWEZ9L9WuApRGxesTtdAKdAIVC4cLu7u6p7mPNDA4O0tLS0ug2yuZ+q6tv4Ni7pguz4NBbtb/dRfPOmPI2mv2+Hcn9Tk5HR8eOiGgbbV5Z35EraSbwI+CBiHgIICIOlcz/HvBomhwA5pesfk6qMU79HRHRBXQBtLW1RXt7ezktNkRvby/N3N9I7re6Rn4f7ppFQ9zZV/uvnd5/dfuUt9Hs9+1I7rd6yrl6R8B6YF9EfKukPrdksU8Au9N4D7BS0qmSFgALgWeB7cBCSQsknULxxd6e6uyGmZmVo5zDko8B1wB9knam2peAqyQtpnh6Zz/weYCI2CNpE8UXaIeAGyLibQBJq4EngBnAhojYU8V9MTOzCZRz9c7TgEaZtXmcde4A7hilvnm89czMrLb8jlwzs4w49M3MMlL7Sw3MrKpaR1w1NFn7111RpU5sOvKRvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRhz6ZmYZmTD0Jc2X9KSkvZL2SPpCqp8paYukF9PPOakuSXdL6pe0S9IFJdtalZZ/UdKq2u2WmZmNppwj/SFgTUScC1wE3CDpXGAtsDUiFgJb0zTA5cDCNHQC90LxnwRwC7AUWALcMvyPwszM6mPC0I+IgxHxszT+BrAPmAesADamxTYCV6bxFcD9UbQNmC1pLnAZsCUijkTEUWALsKyqe2NmZuNSRJS/sNQKPAWcB/xnRMxOdQFHI2K2pEeBdRHxdJq3FbgRaAdOi4ivpfqXgbci4psjbqOT4jMECoXChd3d3VPZv5oaHBykpaWl0W2Uzf1WV9/AsXdNF2bBobca1MwkLJp3RtPftyO538np6OjYERFto807udyNSGoBfgR8MSJ+U8z5oogISeX/9xhHRHQBXQBtbW3R3t5ejc3WRG9vL83c30jut7quXfvYu6bXLBrizr6y/6QaZv/V7U1/347kfqunrKt3JM2kGPgPRMRDqXwonbYh/Tyc6gPA/JLVz0m1sepmZlYnEx6WpFM364F9EfGtklk9wCpgXfr5SEl9taRuii/aHouIg5KeAP6+5MXbS4GbqrMbZlau1rWPsWbR0HueqZRj/7oratCR1VM5z0U/BlwD9EnamWpfohj2myRdD7wCfDrN2wwsB/qBN4HrACLiiKTbge1pudsi4khV9sLMzMoyYeinF2Q1xuxLRlk+gBvG2NYGYMNkGjQzs+rxO3LNzDLi0Dczy4hD38wsIw59M7OMOPTNzDLi0Dczy4hD38wsI83/QSFmNdRawbtSzaYzH+mbmWXEoW9mlhGHvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRhz6ZmYZmTD0JW2QdFjS7pLarZIGJO1Mw/KSeTdJ6pf0gqTLSurLUq1f0trq74qZmU2knCP9+4Blo9TviojFadgMIOlcYCXw4bTOdyTNkDQDuAe4HDgXuCota2ZmdTThRytHxFOSWsvc3gqgOyKOAy9L6geWpHn9EfESgKTutOzeSXdsZmYVU0RMvFAx9B+NiPPS9K3AtcBvgOeANRFxVNK3gW0R8f203Hrg8bSZZRHxuVS/BlgaEatHua1OoBOgUChc2N3dPYXdq63BwUFaWloa3UbZ3O979Q0cq9q2CrPg0FtV21xNNaLXRfPOqHhdP3Ynp6OjY0dEtI02r9IvUbkXuB2I9PNO4LMVbutdIqIL6AJoa2uL9vb2amy2Jnp7e2nm/kZyv+91bRW/RGXNoiHu7Jse30vUiF73X91e8bp+7FZPRb/1iDg0PC7pe8CjaXIAmF+y6Dmpxjh1MzOrk4ou2ZQ0t2TyE8DwlT09wEpJp0paACwEngW2AwslLZB0CsUXe3sqb9vMzCox4ZG+pB8A7cBZkg4AtwDtkhZTPL2zH/g8QETskbSJ4gu0Q8ANEfF22s5q4AlgBrAhIvZUfW/MzGxc5Vy9c9Uo5fXjLH8HcMco9c3A5kl1Z2ZmVeV35JqZZcShb2aWEYe+mVlGHPpmZhlx6JuZZcShb2aWEYe+mVlGHPpmZhlx6JuZZcShb2aWEYe+mVlGHPpmZhlx6JuZZcShb2aWEYe+mVlGHPpmZhlx6JuZZaSiL0Y3M5us1rWPVbzufctOr2IneXPo27Q2lSAxy5FP75iZZWTC0Je0QdJhSbtLamdK2iLpxfRzTqpL0t2S+iXtknRByTqr0vIvSlpVm90xM7PxlHOkfx+wbERtLbA1IhYCW9M0wOXAwjR0AvdC8Z8EcAuwFFgC3DL8j8LMzOpnwtCPiKeAIyPKK4CNaXwjcGVJ/f4o2gbMljQXuAzYEhFHIuIosIX3/iMxM7Maq/SF3EJEHEzjrwGFND4PeLVkuQOpNlb9PSR1UnyWQKFQoLe3t8IWa29wcLCp+xvpROx3zaKh+jRThsKs5upnPNOpVzgxH7uNMuWrdyIiJEU1mknb6wK6ANra2qK9vb1am6663t5emrm/kU7Efq9toqt31iwa4s6+6XFB3HTqFYqXbJ5oj91GqfTqnUPptA3p5+FUHwDmlyx3TqqNVTczszqqNPR7gOErcFYBj5TUP5Ou4rkIOJZOAz0BXCppTnoB99JUMzOzOprw+Z2kHwDtwFmSDlC8CmcdsEnS9cArwKfT4puB5UA/8CZwHUBEHJF0O7A9LXdbRIx8cdjMzGpswtCPiKvGmHXJKMsGcMMY29kAbJhUd2ZmVlV+R66ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGZk+78M2s2z1DRyr+CM39q+7osrdTG8+0jczy4hD38wsIw59M7OMOPTNzDLi0Dczy4hD38wsIw59M7OMOPTNzDLi0Dczy4hD38wsIw59M7OMOPTNzDLi0Dczy8iUQl/Sfkl9knZKei7VzpS0RdKL6eecVJekuyX1S9ol6YJq7ICZmZWvGkf6HRGxOCLa0vRaYGtELAS2pmmAy4GFaegE7q3CbZuZ2STU4vTOCmBjGt8IXFlSvz+KtgGzJc2twe2bmdkYphr6AfxY0g5JnalWiIiDafw1oJDG5wGvlqx7INXMzKxOFBGVryzNi4gBSb8HbAH+GuiJiNklyxyNiDmSHgXWRcTTqb4VuDEinhuxzU6Kp38oFAoXdnd3V9xfrQ0ODtLS0tLoNsp2IvbbN3CsTt1MrDALDr3V6C7KM516han1u2jeGdVtpgyN/lvr6OjYUXLK/V2m9HWJETGQfh6W9DCwBDgkaW5EHEynbw6nxQeA+SWrn5NqI7fZBXQBtLW1RXt7+1RarKne3l6aub+RTsR+K/0KvVpYs2iIO/umxzeQTqdeYWr97r+6vbrNlKGZ/9Yq/q1LOh04KSLeSOOXArcBPcAqYF36+UhapQdYLakbWAocKzkNZBlrHSO41ywaaqpQNzsRTOVffQF4WNLwdv45Iv5N0nZgk6TrgVeAT6flNwPLgX7gTeC6Kdy2mVlZxjqoKNeJ9sXqFYd+RLwEfGSU+q+BS0apB3BDpbdnZmZT53fkmpllxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpaR6fMxe9bUpvr5JmZWHz7SNzPLiEPfzCwjDn0zs4w49M3MMuLQNzPLiK/esXf4ChyzE59D38xsHJUcDA1/v3MzftWiT++YmWXEoW9mlhGf3jnBjPdUdPgpp5nlq+5H+pKWSXpBUr+ktfW+fTOznNX1SF/SDOAe4OPAAWC7pJ6I2FvPPpqZr6Axs1qq9+mdJUB/RLwEIKkbWAGcUKHv4DYzmFoW1OrKH0VETTY86o1JnwSWRcTn0vQ1wNKIWF2yTCfQmSb/EHihbg1O3lnArxrdxCS439qaTv1Op17B/U7W70fE2aPNaLoXciOiC+hqdB/lkPRcRLQ1uo9yud/amk79Tqdewf1WU71fyB0A5pdMn5NqZmZWB/UO/e3AQkkLJJ0CrAR66tyDmVm26np6JyKGJK0GngBmABsiYk89e6iyaXEaqoT7ra3p1O906hXcb9XU9YVcMzNrLH8Mg5lZRhz6ZmYZcehXQNLtknZJ2inpx5Len+qSdHf6iIldki5odK8Akr4h6Repp4clzS6Zd1Pq9wVJlzWyz9TPpyTtkfS/ktpGzGuqXoc1+0eLSNog6bCk3SW1MyVtkfRi+jmnkT2WkjRf0pOS9qbHwhdSvel6lnSapGclPZ96/WqqL5D0THpM/DBduNIcIsLDJAfgd0rG/wb4bhpfDjwOCLgIeKbRvaa+LgVOTuNfB76exs8FngdOBRYAvwRmNLjXP6L4prxeoK2k3nS9pr5mpF4+AJySejy30X2N6PFPgQuA3SW1fwDWpvG1w4+JZhiAucAFafx9wH+k33/T9Zz+1lvS+EzgmfS3vwlYmerfBf6q0b0ODz7Sr0BE/KZk8nRg+NXwFcD9UbQNmC1pbt0bHCEifhwRQ2lyG8X3R0Cx3+6IOB4RLwP9FD8qo2EiYl9EjPYu7KbrNXnno0Ui4r+B4Y8WaRoR8RRwZER5BbAxjW8ErqxrU+OIiIMR8bM0/gawD5hHE/ac/tYH0+TMNARwMfBgqjdFr8Mc+hWSdIekV4Grga+k8jzg1ZLFDqRaM/ksxWcjMD36HdasvTZrXxMpRMTBNP4aUGhkM2OR1AqcT/EIuil7ljRD0k7gMLCF4jO/10sOtJrqMeHQH4Okn0jaPcqwAiAibo6I+cADwOrxt1Z7E/WblrkZGKLYc8OU06vVTxTPQTTdtduSWoAfAV8c8ey6qXqOiLcjYjHFZ9BLgA81uKVxNd1n7zSLiPizMhd9ANgM3EIDP2Zion4lXQv8OXBJ+oOBBvU7ifu2VLN+hEez9jWRQ5LmRsTBdArycKMbKiVpJsXAfyAiHkrlpu45Il6X9CTwUYqndk9OR/tN9ZjwkX4FJC0smVwB/CKN9wCfSVfxXAQcK3k62jCSlgF/B/xFRLxZMqsHWCnpVEkLgIXAs43osQzN2ut0/WiRHmBVGl8FPNLAXt5FkoD1wL6I+FbJrKbrWdLZw1fDSZpF8btC9gFPAp9MizVFr+9o9CvJ03GgeASyG9gF/CswL/7/lfx7KJ7T66Pk6pMG99tP8bzzzjR8t2TezanfF4DLm6DXT1A8B3ocOAQ80ay9lvS1nOIVJr8Ebm50P6P09wPgIPA/6b69HvhdYCvwIvAT4MxG91nS759QPHWzq+Qxu7wZewb+GPh56nU38JVU/wDFg5J+4F+AUxvd6/Dgj2EwM8uIT++YmWXEoW9mlhGHvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRv4PVCMY6yoPyiwAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], "source": [ - "### Goodness Metric\n", - "temp_goodness = sqrt( (noaa_min_temp - circle_min_temp)^2 + (noaa_max_temp - circle_max_temp)^2 )" + "noaa_max_temps.hist(bins = 20)" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 319, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], "source": [ - "temp_df['temp_goodness'] = round(np.sqrt(((temp_df['noaa_min_temp'] - temp_df['circle_min_temp'])**2) + ((temp_df['noaa_max_temp'] - temp_df['circle_max_temp'])**2)),2)" + "import random\n", + "import numpy\n", + "from matplotlib import pyplot\n", + "\n", + "x = cbc_max_temps.tolist()\n", + "y = noaa_max_temps.tolist()\n", + "\n", + "bins = numpy.linspace(-30, 30, 20)\n", + "\n", + "pyplot.hist(x, bins, alpha=0.5, label='CBC Max Temps')\n", + "pyplot.hist(y, bins, alpha=0.5, label='NOAA Max Temps')\n", + "pyplot.legend(loc='upper right')\n", + "pyplot.title(\"Histagram of CBC and NOAA Max Temps\")\n", + "pyplot.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Catagories\n", - "Values in catagories can be changed and then applied to dataframe" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "# Function to assign grade scores\n", - "def assign_grade(metric_score):\n", - " if metric_score <= excellent_score:\n", - " return 'excellent'\n", - " elif metric_score <= good_score:\n", - " return 'good'\n", - " elif metric_score <= fair_score:\n", - " return 'fair'\n", - " else:\n", - " return 'poor'" + "#### Preform a two sample T Test " ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 321, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Ttest_indResult(statistic=-4.415955625255944, pvalue=1.0088112774678262e-05)" + ] + }, + "execution_count": 321, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Applying the scores\n", - "temp_df['goodness_grade'] = temp_df['temp_goodness'].apply(lambda metric_score: assign_grade(metric_score))" + "scipy.stats.ttest_ind(cbc_max_temps.tolist(), noaa_max_temps.tolist(), equal_var=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "---" + "# Goodness Metric for Max and Min Temp points " ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 334, "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The goodness Metric of the closest stations were: \n", + "poor 59144\n", + "excellent 6003\n", + "good 2639\n", + "fair 604\n", + "Name: goodness_closest, dtype: int64\n", + "The goodness Metric of the closest stations as percentage: \n", + "poor 86.480480\n", + "excellent 8.777599\n", + "good 3.858751\n", + "fair 0.883170\n", + "Name: goodness_closest, dtype: float64\n" + ] + } + ], "source": [ - "## Ecosystem Split\n", - "Creating two dataframes based on matching min and macro ecosystems" + "# The Closest Stations' Goodness metric was, \n", + "print(\"The goodness Metric of the closest stations were: \")\n", + "print(circle_data.goodness_closest.value_counts(dropna = False))\n", + "print(\"The goodness Metric of the closest stations as percentage: \")\n", + "print(circle_data.goodness_closest.value_counts(dropna = False) / circle_data.goodness_closest.size * 100)" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 335, "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The goodness Metric of the closest stations were: \n", + "poor 58929\n", + "excellent 6062\n", + "good 2535\n", + "fair 602\n", + "NaN 262\n", + "Name: goodness_lowest, dtype: int64\n", + "The goodness Metric of the closest stations as percentage: \n", + "poor 86.166106\n", + "excellent 8.863869\n", + "good 3.706682\n", + "fair 0.880246\n", + "NaN 0.383097\n", + "Name: goodness_lowest, dtype: float64\n" + ] + } + ], "source": [ - "#### Specific Ecosystem Match" + "# The Lowesr Stations' Goodness metric was, \n", + "print(\"The goodness Metric of the closest stations were: \")\n", + "print(circle_data.goodness_lowest.value_counts(dropna = False))\n", + "print(\"The goodness Metric of the closest stations as percentage: \")\n", + "print(circle_data.goodness_lowest.value_counts(dropna = False) / circle_data.goodness_lowest.size * 100)\n" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 336, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Number of rows before specific ecosystem match: 16986\n", - "Number of rows after specific ecosystem match: 16160\n", - "\n", - "Number of rows lost: 826\n" + "The goodness Metric of ALL stations were: \n", + "poor 61828\n", + "NaN 3515\n", + "excellent 2040\n", + "good 809\n", + "fair 198\n", + "Name: goodness_mode, dtype: int64\n", + "The goodness Metric of the closest stations as percentage: \n", + "poor 90.405030\n", + "NaN 5.139640\n", + "excellent 2.982892\n", + "good 1.182921\n", + "fair 0.289516\n", + "Name: goodness_mode, dtype: float64\n" ] } ], "source": [ - "temp_df_specific_ecosystems = temp_df.loc[temp_df['specific_circle_ecosystem'].isna() == temp_df['specific_station_ecosystem'].isna()]\n", - "print(f'Number of rows before specific ecosystem match: {temp_df.shape[0]}')\n", - "print(f'Number of rows after specific ecosystem match: {temp_df_specific_ecosystems.shape[0]}')\n", - "print()\n", - "print(f'Number of rows lost: {temp_df.shape[0] - temp_df_specific_ecosystems.shape[0]}')" + "# The Mode of all Stations for for each Circles were\n", + "print(\"The goodness Metric of ALL stations were: \")\n", + "print(circle_data.goodness_mode.value_counts(dropna = False))\n", + "print(\"The goodness Metric of the closest stations as percentage: \")\n", + "print(circle_data.goodness_mode.value_counts(dropna = False) / circle_data.goodness_mode.size * 100)\n" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 337, "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The goodness Metric of NLCD stations were: \n", + "poor 33962\n", + "NaN 29820\n", + "excellent 3104\n", + "good 1232\n", + "fair 272\n", + "Name: n_goodness_mode, dtype: int64\n", + "The goodness Metric of the closest stations as percentage: \n", + "poor 49.659307\n", + "NaN 43.602866\n", + "excellent 4.538675\n", + "good 1.801433\n", + "fair 0.397719\n", + "Name: n_goodness_mode, dtype: float64\n" + ] + } + ], "source": [ - "#### Macro Ecosystem Match" + "# The Mode of all NLCD Stations for for each Circles were\n", + "print(\"The goodness Metric of NLCD stations were: \")\n", + "print(circle_data.n_goodness_mode.value_counts(dropna = False))\n", + "print(\"The goodness Metric of the closest stations as percentage: \")\n", + "print(circle_data.n_goodness_mode.value_counts(dropna = False) / circle_data.n_goodness_mode.size * 100)\n" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 338, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Number of rows before macro ecosystem match: 16986\n", - "Number of rows after macro ecosystem match: 14798\n", - "\n", - "Number of rows lost: 2188\n" + "The goodness Metric of NLCD stations were: \n", + "NaN 36555\n", + "poor 27484\n", + "excellent 2945\n", + "good 1157\n", + "fair 249\n", + "Name: e_goodness_mode, dtype: int64\n", + "The goodness Metric of the closest stations as percentage: \n", + "NaN 53.450797\n", + "poor 40.187162\n", + "excellent 4.306185\n", + "good 1.691768\n", + "fair 0.364088\n", + "Name: e_goodness_mode, dtype: float64\n" ] } ], "source": [ - "temp_df_macro_ecosystems = temp_df.loc[temp_df['macro_circle_ecosystem'].isna() == temp_df['macro_station_ecosystem'].isna()]\n", - "print(f'Number of rows before macro ecosystem match: {temp_df.shape[0]}')\n", - "print(f'Number of rows after macro ecosystem match: {temp_df_macro_ecosystems.shape[0]}')\n", - "print()\n", - "print(f'Number of rows lost: {temp_df.shape[0] - temp_df_macro_ecosystems.shape[0]}')" + "# The Mode of all ECOSYS Stations for for each Circles were\n", + "print(\"The goodness Metric of NLCD stations were: \")\n", + "print(circle_data.e_goodness_mode.value_counts(dropna = False))\n", + "print(\"The goodness Metric of the closest stations as percentage: \")\n", + "print(circle_data.e_goodness_mode.value_counts(dropna = False) / circle_data.e_goodness_mode.size * 100)\n" ] }, { @@ -942,7 +4904,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 323, "metadata": {}, "outputs": [], "source": [ @@ -1035,9 +4997,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 324, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'temp_df_specific_ecosystems' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mgrade_figure\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtemp_df_specific_ecosystems\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'temp_df_specific_ecosystems' is not defined" + ] + } + ], "source": [ "grade_figure(temp_df_specific_ecosystems)" ] @@ -1057,6 +5031,27 @@ "source": [ "grade_figure(temp_df_macro_ecosystems)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -1075,7 +5070,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.9" + "version": "3.7.1" } }, "nbformat": 4, From 029b793fe5096fa872cb9e734ba3acd2cfe0dcd3 Mon Sep 17 00:00:00 2001 From: ren Date: Thu, 8 Oct 2020 19:26:26 -0400 Subject: [PATCH 2/2] # Moved agreement threshold to top of notebook --- notebooks/2.4-je-temperature-summary.ipynb | 117 +++++++++++++++++++-- 1 file changed, 107 insertions(+), 10 deletions(-) diff --git a/notebooks/2.4-je-temperature-summary.ipynb b/notebooks/2.4-je-temperature-summary.ipynb index 20c5f5e..4ebbcc6 100644 --- a/notebooks/2.4-je-temperature-summary.ipynb +++ b/notebooks/2.4-je-temperature-summary.ipynb @@ -115,6 +115,16 @@ "poor_score = 20" ] }, + { + "cell_type": "code", + "execution_count": 275, + "metadata": {}, + "outputs": [], + "source": [ + "AGREEMENT_THRESHOLD = 0.75\n", + "AGREEMENT_THRESHOLD = max(AGREEMENT_THRESHOLD, 1 - AGREEMENT_THRESHOLD)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -804,6 +814,103 @@ "raw_data.tail()" ] }, + { + "cell_type": "code", + "execution_count": 339, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['circle_name',\n", + " 'country_state',\n", + " 'lat',\n", + " 'lon',\n", + " 'count_year',\n", + " 'count_date',\n", + " 'n_field_counters',\n", + " 'n_feeder_counters',\n", + " 'min_field_parties',\n", + " 'max_field_parties',\n", + " 'field_hours',\n", + " 'feeder_hours',\n", + " 'nocturnal_hours',\n", + " 'field_distance',\n", + " 'nocturnal_distance',\n", + " 'distance_units',\n", + " 'min_temp',\n", + " 'max_temp',\n", + " 'temp_unit',\n", + " 'min_wind',\n", + " 'max_wind',\n", + " 'wind_unit',\n", + " 'min_snow',\n", + " 'max_snow',\n", + " 'snow_unit',\n", + " 'am_cloud',\n", + " 'pm_cloud',\n", + " 'field_distance_imperial',\n", + " 'field_distance_metric',\n", + " 'nocturnal_distance_imperial',\n", + " 'nocturnal_distance_metric',\n", + " 'min_snow_imperial',\n", + " 'min_snow_metric',\n", + " 'max_snow_metric',\n", + " 'max_snow_imperial',\n", + " 'min_temp_imperial',\n", + " 'max_temp_imperial',\n", + " 'min_temp_metric',\n", + " 'max_temp_metric',\n", + " 'min_wind_metric',\n", + " 'max_wind_metric',\n", + " 'min_wind_imperial',\n", + " 'max_wind_imperial',\n", + " 'ui',\n", + " 'geohash_circle',\n", + " 'circle_id',\n", + " 'id',\n", + " 'latitude',\n", + " 'longitude',\n", + " 'elevation',\n", + " 'state',\n", + " 'name',\n", + " 'gsn_flag',\n", + " 'hcn_crn_flag',\n", + " 'wmoid',\n", + " 'geohash_station',\n", + " 'temp_min_value',\n", + " 'temp_max_value',\n", + " 'precipitation_value',\n", + " 'temp_avg',\n", + " 'snow',\n", + " 'snwd',\n", + " 'am_rain',\n", + " 'pm_rain',\n", + " 'am_snow',\n", + " 'pm_snow',\n", + " 'circle_elev',\n", + " 'elevation_source',\n", + " 'block_fips',\n", + " 'county_fips',\n", + " 'Ecosys_circle',\n", + " 'Usgsid_sys_circle',\n", + " 'Nlcd_code_circle',\n", + " 'Nlcd_circle',\n", + " 'Ecosys_station',\n", + " 'Usgsid_sys_station',\n", + " 'Nlcd_code_station',\n", + " 'Nlcd_station']" + ] + }, + "execution_count": 339, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(raw_data.columns)" + ] + }, { "cell_type": "code", "execution_count": 232, @@ -2123,16 +2230,6 @@ "circle_data = g.apply(temp_calc)" ] }, - { - "cell_type": "code", - "execution_count": 275, - "metadata": {}, - "outputs": [], - "source": [ - "AGREEMENT_THRESHOLD = 0.75\n", - "AGREEMENT_THRESHOLD = max(AGREEMENT_THRESHOLD, 1 - AGREEMENT_THRESHOLD)" - ] - }, { "cell_type": "markdown", "metadata": {},