diff --git a/21 Day Challenge/Day 1.ipynb b/21 Day Challenge/Day 1.ipynb new file mode 100644 index 0000000..e30e5f6 --- /dev/null +++ b/21 Day Challenge/Day 1.ipynb @@ -0,0 +1,83 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Challenge: Modify the lease agreement with your signature without changing the original lease variable." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "lease = '''Dear Dot, \n", + " This document validates that you are beholden to a monthly payment of rent for this house.\n", + " Rent is to be paid by the first of every month.\n", + " Fill in your signature to agree to these terms. \n", + " -------------\n", + " Please Sign Here: \n", + "'''" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Dear Dot, \n", + "This document validates that you are beholden to a monthly payment of rent for this house.\n", + "Rent is to be paid by the first of every month.\n", + "Fill in your signature to agree to these terms.\n", + "-------------\n", + "Please Sign Here: Dot \n", + "\n" + ] + } + ], + "source": [ + "# Solution\n", + "\n", + "signature = \"Dot\"\n", + "\n", + "new_lease = f'''\n", + "Dear Dot, \n", + "This document validates that you are beholden to a monthly payment of rent for this house.\n", + "Rent is to be paid by the first of every month.\n", + "Fill in your signature to agree to these terms.\n", + "-------------\n", + "Please Sign Here: {signature} \n", + "'''\n", + "print(new_lease)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/21 Day Challenge/Day 10.ipynb b/21 Day Challenge/Day 10.ipynb new file mode 100644 index 0000000..57e89be --- /dev/null +++ b/21 Day Challenge/Day 10.ipynb @@ -0,0 +1,144 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Challenge\n", + "\n", + "Help Dot figure out how profitable selling fresh milk can be, by looking at the dataset for the cow farm. Fill in the values for the following columns based on the available data:\n", + "\n", + "Total Milk Production\n", + "Total Revenue\n", + "How much revenue did the cow farm make in the year 2020?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "df = pd.read_csv('milk_32.csv')\n", + "df = df.drop(columns = ['Unnamed: 0'])\n", + " \n", + "df.head()\n", + "\n", + "#Month\tMonthly milk production: pounds per cow\tNumber of Cows\tTotal Milk Production\tPrice_Per_Pound\tTotal Revenue\n", + "#0\t07-Feb\t589.0\t30\tNaN\t0.22\tNaN\n", + "#1\t07-Mar\t561.0\t32\tNaN\t0.22\tNaN\n", + "#2\t07-Apr\t640.0\t35\tNaN\t0.22\tNaN\n", + "#3\t07-May\t656.0\t35\tNaN\t0.22\tNaN\n", + "#4\t07-Jun\t727.0\t35\tNaN\t0.22\tNaN" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Solution \n", + "df['Total Milk Production'] = df['Monthly milk production: pounds per cow'] * df['Number of Cows']\n", + "df.head()\n", + "\n", + "#Month\tMonthly milk production: pounds per cow\tNumber of Cows\tTotal Milk Production\tPrice_Per_Pound\tTotal Revenue\n", + "#0\t07-Feb\t589.0\t30\t17670.0\t0.22\tNaN\n", + "#1\t07-Mar\t561.0\t32\t17952.0\t0.22\tNaN\n", + "#2\t07-Apr\t640.0\t35\t22400.0\t0.22\tNaN\n", + "#3\t07-May\t656.0\t35\t22960.0\t0.22\tNaN\n", + "#4\t07-Jun\t727.0\t35\t25445.0\t0.22\tNaN" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df['Total Revenue'] = df['Total Milk Production'] * df['Price_Per_Pound']\n", + "df.tail(15)\n", + "\n", + "#Month\tMonthly milk production: pounds per cow\tNumber of Cows\tTotal Milk Production\tPrice_Per_Pound\tTotal Revenue\n", + "#153\t19-Nov\t812.0\t62\t50344.0\t0.32\t16110.08\n", + "#154\t19-Dec\t773.0\t62\t47926.0\t0.32\t15336.32\n", + "#155\t20-Jan\t813.0\t62\t50406.0\t0.32\t16129.92\n", + "#156\t20-Feb\t834.0\t62\t51708.0\t0.32\t16546.56\n", + "#157\t20-Mar\t782.0\t62\t48484.0\t0.32\t15514.88\n", + "#158\t20-Apr\t892.0\t62\t55304.0\t0.32\t17697.28\n", + "#159\t20-May\t903.0\t62\t55986.0\t0.32\t17915.52\n", + "#160\t20-Jun\t966.0\t62\t59892.0\t0.32\t19165.44\n", + "#161\t20-Jul\t937.0\t62\t58094.0\t0.32\t18590.08\n", + "#162\t20-Aug\t896.0\t62\t55552.0\t0.32\t17776.64\n", + "#163\t20-Sep\t858.0\t62\t53196.0\t0.32\t17022.72\n", + "#164\t20-Oct\t755.5\t62\t46841.0\t0.32\t14989.12\n", + "#165\t20-Nov\t755.5\t62\t46841.0\t0.32\t14989.12\n", + "#166\t20-Dec\t797.0\t62\t49414.0\t0.32\t15812.48\n", + "#167\t21-Jan\t843.0\t62\t52266.0\t0.32\t16725.12" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "revenue_2020 = df['Total Revenue'][155:167]\n", + "print(sum(revenue_2020))\n", + "\n", + "#202149.76" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_revenue_2019 = df[155:167]\n", + "df_revenue_2019\n", + "#df_revenue_2019['Total Revenue'].sum()\n", + "\n", + "\n", + "#Month\tMonthly milk production: pounds per cow\tNumber of Cows\tTotal Milk Production\tPrice_Per_Pound\tTotal Revenue\n", + "#155\t20-Jan\t813.0\t62\t50406.0\t0.32\t16129.92\n", + "#156\t20-Feb\t834.0\t62\t51708.0\t0.32\t16546.56\n", + "#157\t20-Mar\t782.0\t62\t48484.0\t0.32\t15514.88\n", + "#158\t20-Apr\t892.0\t62\t55304.0\t0.32\t17697.28\n", + "#159\t20-May\t903.0\t62\t55986.0\t0.32\t17915.52\n", + "#160\t20-Jun\t966.0\t62\t59892.0\t0.32\t19165.44\n", + "#161\t20-Jul\t937.0\t62\t58094.0\t0.32\t18590.08\n", + "#162\t20-Aug\t896.0\t62\t55552.0\t0.32\t17776.64\n", + "#163\t20-Sep\t858.0\t62\t53196.0\t0.32\t17022.72\n", + "#164\t20-Oct\t755.5\t62\t46841.0\t0.32\t14989.12\n", + "#165\t20-Nov\t755.5\t62\t46841.0\t0.32\t14989.12\n", + "#166\t20-Dec\t797.0\t62\t49414.0\t0.32\t15812.48" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/21 Day Challenge/Day 11.ipynb b/21 Day Challenge/Day 11.ipynb new file mode 100644 index 0000000..5ff24af --- /dev/null +++ b/21 Day Challenge/Day 11.ipynb @@ -0,0 +1,104 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Challenge\n", + "Can Dot spin a profit as an avocado farmer? Examine the data to find the average cost of avocados in Albany in 2017." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "df = pd.read_csv('avocado.csv', index_col = 0)\n", + "df.head()\n", + "\n", + "#Date\tAveragePrice\tTotal Volume\tTotal Bags\tSmall Bags\tLarge Bags\tXLarge Bags\ttype\tyear\tregion\n", + "#0\t2015-12-27\t1.33\t64236.62\t8696.87\t8603.62\t93.25\t0.0\tconventional\t2015\tAlbany\n", + "#1\t2015-12-20\t1.35\t54876.98\t9505.56\t9408.07\t97.49\t0.0\tconventional\t2015\tAlbany\n", + "#2\t2015-12-13\t0.93\t118220.22\t8145.35\t8042.21\t103.14\t0.0\tconventional\t2015\tAlbany\n", + "#3\t2015-12-06\t1.08\t78992.15\t5811.16\t5677.40\t133.76\t0.0\tconventional\t2015\tAlbany\n", + "#4\t2015-11-29\t1.28\t51039.60\t6183.95\t5986.26\t197.69\t0.0\tconventional\t2015\tAlbany" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Solution\n", + "avo = df.groupby(['region','year']).mean()\n", + "avo.head(20)\n", + "\n", + "\n", + "#AveragePrice\tTotal Volume\tTotal Bags\tSmall Bags\tLarge Bags\tXLarge Bags\n", + "#region\tyear\t\t\t\t\t\t\n", + "#Albany\t2015\t1.538750\t38749.004135\t6919.765385\t6744.927404\t173.515865\t1.322115\n", + "#2016\t1.533942\t50618.611442\t8060.576058\t7629.006154\t222.983942\t208.585962\n", + "#2017\t1.637830\t49354.545094\t7882.128302\t4943.011226\t2763.058868\t176.067642\n", + "#2018\t1.435833\t64249.423750\t11949.729167\t9504.018750\t2323.395833\t122.314583\n", + "#Atlanta\t2015\t1.380577\t223381.712692\t28817.219904\t18091.291923\t10717.445673\t8.482308\n", + "#2016\t1.214135\t272373.829808\t95930.159423\t52369.293077\t43229.854712\t331.011635\n", + "#2017\t1.428774\t271840.754528\t110067.614811\t70132.168962\t38002.920189\t1932.516226\n", + "#2018\t1.288750\t342975.935417\t163317.456667\t111701.045417\t50035.261667\t1581.149583\n", + "#BaltimoreWashington\t2015\t1.368846\t390822.880192\t91233.794808\t88677.811635\t2522.575000\t33.408173\n", + "#2016\t1.587596\t393209.637692\t99574.801250\t96155.722885\t2591.820673\t827.257692\n", + "#2017\t1.679434\t386939.947736\t109661.628868\t104926.584717\t3672.241132\t1062.793585\n", + "#2018\t1.378333\t506620.958333\t159913.309167\t157196.139583\t2516.336667\t200.832917\n", + "#Boise\t2015\t1.373750\t36388.051346\t4981.101058\t4641.513846\t323.131346\t16.455865\n", + "#2016\t1.141923\t44745.283942\t22080.516346\t20934.522404\t1099.331250\t46.662692\n", + "#2017\t1.492642\t44910.955755\t19759.427075\t16523.018491\t3212.219057\t24.170660\n", + "#2018\t1.492500\t50614.982083\t20437.698750\t11107.702500\t9274.874583\t55.121667\n", + "#Boston\t2015\t1.473558\t263990.304231\t55372.257788\t54624.526154\t746.329423\t1.402212\n", + "#2016\t1.426154\t293954.952596\t66656.696154\t65671.839423\t635.623365\t349.233365\n", + "#2017\t1.679528\t288779.926038\t66325.052830\t56744.661698\t9218.595472\t361.776792\n", + "#2018\t1.576667\t359875.248333\t73917.082917\t57694.640833\t15803.044167\t419.397917" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "avo.loc[('Albany', 2017)]\n", + "\n", + "#AveragePrice 1.637830\n", + "#Total Volume 49354.545094\n", + "#Total Bags 7882.128302\n", + "#Small Bags 4943.011226\n", + "#Large Bags 2763.058868\n", + "#XLarge Bags 176.067642\n", + "#Name: (Albany, 2017), dtype: float64" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/21 Day Challenge/Day 12.ipynb b/21 Day Challenge/Day 12.ipynb new file mode 100644 index 0000000..3937e38 --- /dev/null +++ b/21 Day Challenge/Day 12.ipynb @@ -0,0 +1,100 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Challenge\n", + "Examining the numbers, Dot understands that the prices of both conventional and organic avocados rise and fall frequently. No matter how they grow the avocados, they don't want to sell them for less than $2.\n", + "\n", + "Looking at recent years, Dot needs you to help them find: in which year or years did both conventional and organic avocados have had average prices above $2?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "df = pd.read_csv('avocado.csv', index_col = 0)\n", + "df.head()\n", + "\n", + "#\tDate\tAveragePrice\tTotal Volume\tTotal Bags\tSmall Bags\tLarge Bags\tXLarge Bags\ttype\tyear\tregion\n", + "#0\t2015-12-27\t1.33\t64236.62\t8696.87\t8603.62\t93.25\t0.0\tconventional\t2015\tAlbany\n", + "#1\t2015-12-20\t1.35\t54876.98\t9505.56\t9408.07\t97.49\t0.0\tconventional\t2015\tAlbany\n", + "#2\t2015-12-13\t0.93\t118220.22\t8145.35\t8042.21\t103.14\t0.0\tconventional\t2015\tAlbany\n", + "#3\t2015-12-06\t1.08\t78992.15\t5811.16\t5677.40\t133.76\t0.0\tconventional\t2015\tAlbany\n", + "#4\t2015-11-29\t1.28\t51039.60\t6183.95\t5986.26\t197.69\t0.0\tconventional\t2015\tAlbany" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Solution\n", + "user_filter = df['AveragePrice'] >= 2\n", + "\n", + "filtered_df = df[user_filter]\n", + "\n", + "filtered_df\n", + "\n", + "\n", + "#\tDate\tAveragePrice\tTotal Volume\tTotal Bags\tSmall Bags\tLarge Bags\tXLarge Bags\ttype\tyear\tregion\n", + "#7\t2016-11-06\t2.07\t376476.71\t59042.39\t57803.54\t1095.69\t143.16\tconventional\t2016\tChicago\n", + "#8\t2016-10-30\t2.07\t375213.57\t54407.20\t48140.45\t1587.11\t4679.64\tconventional\t2016\tChicago\n", + "#7\t2016-11-06\t2.07\t492504.81\t74657.63\t74025.26\t103.20\t529.17\tconventional\t2016\tSanFrancisco\n", + "#8\t2016-10-30\t2.20\t477937.80\t59465.15\t57223.44\t269.21\t1972.50\tconventional\t2016\tSanFrancisco\n", + "#10\t2017-10-22\t2.06\t519814.03\t64007.37\t59543.29\t4441.86\t22.22\tconventional\t2017\tChicago\n", + "#...\t...\t...\t...\t...\t...\t...\t...\t...\t...\t...\n", + "#6\t2018-02-11\t2.22\t21708.65\t2533.33\t2533.33\t0.00\t0.00\torganic\t2018\tSanFrancisco\n", + "#8\t2018-01-28\t2.27\t20325.75\t2148.89\t2148.89\t0.00\t0.00\torganic\t2018\tSanFrancisco\n", + "#11\t2018-01-07\t2.30\t20151.24\t1943.34\t1943.34\t0.00\t0.00\torganic\t2018\tSanFrancisco\n", + "#9\t2018-01-21\t2.02\t33986.68\t20402.90\t1469.79\t18905.41\t27.70\torganic\t2018\tSeattle\n", + "#10\t2018-01-14\t2.03\t36228.45\t16701.50\t1630.19\t15063.98\t7.33\torganic\t2018\tSeattle" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "filtered_df.groupby(['year','type']).count()\n", + "\n", + "#Date\tAveragePrice\tTotal Volume\tTotal Bags\tSmall Bags\tLarge Bags\tXLarge Bags\tregion\n", + "#year\ttype\t\t\t\t\t\t\t\t\n", + "#2015\torganic\t295\t295\t295\t295\t295\t295\t295\t295\n", + "#2016\tconventional\t4\t4\t4\t4\t4\t4\t4\t4\n", + "#organic\t371\t371\t371\t371\t371\t371\t371\t371\n", + "#2017\tconventional\t19\t19\t19\t19\t19\t19\t19\t19\n", + "#organic\t731\t731\t731\t731\t731\t731\t731\t731\n", + "#2018\torganic\t24\t24\t24\t24\t24\t24\t24\t24" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/21 Day Challenge/Day 13.ipynb b/21 Day Challenge/Day 13.ipynb new file mode 100644 index 0000000..1f13543 --- /dev/null +++ b/21 Day Challenge/Day 13.ipynb @@ -0,0 +1,123 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Challenge\n", + "Use the pandas sort function and the pandas filter function from the previous challenge to answer these questions:\n", + "\n", + "Which wines had a quality of 8 or higher and a residual sugar level above 5?\n", + "How many wines in total had a quality of 8 and 7 and a citric acid level below 0.4?\n", + "Note: Use the index positions of the wines as the wine names." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "wine_df = pd.read_csv('winequality-red.csv')\n", + "wine_df\n", + "\n", + "#\tfixed acidity\tvolatile acidity\tcitric acid\tresidual sugar\tchlorides\tfree sulfur dioxide\ttotal sulfur dioxide\tdensity\tpH\tsulphates\talcohol\tquality\n", + "#0\t7.4\t0.700\t0.00\t1.9\t0.076\t11.0\t34.0\t0.99780\t3.51\t0.56\t9.4\t5\n", + "#1\t7.8\t0.880\t0.00\t2.6\t0.098\t25.0\t67.0\t0.99680\t3.20\t0.68\t9.8\t5\n", + "#2\t7.8\t0.760\t0.04\t2.3\t0.092\t15.0\t54.0\t0.99700\t3.26\t0.65\t9.8\t5\n", + "#3\t11.2\t0.280\t0.56\t1.9\t0.075\t17.0\t60.0\t0.99800\t3.16\t0.58\t9.8\t6\n", + "#4\t7.4\t0.700\t0.00\t1.9\t0.076\t11.0\t34.0\t0.99780\t3.51\t0.56\t9.4\t5\n", + "#...\t...\t...\t...\t...\t...\t...\t...\t...\t...\t...\t...\t...\n", + "#1594\t6.2\t0.600\t0.08\t2.0\t0.090\t32.0\t44.0\t0.99490\t3.45\t0.58\t10.5\t5\n", + "#1595\t5.9\t0.550\t0.10\t2.2\t0.062\t39.0\t51.0\t0.99512\t3.52\t0.76\t11.2\t6\n", + "#1596\t6.3\t0.510\t0.13\t2.3\t0.076\t29.0\t40.0\t0.99574\t3.42\t0.75\t11.0\t6\n", + "#1597\t5.9\t0.645\t0.12\t2.0\t0.075\t32.0\t44.0\t0.99547\t3.57\t0.71\t10.2\t5\n", + "#1598\t6.0\t0.310\t0.47\t3.6\t0.067\t18.0\t42.0\t0.99549\t3.39\t0.66\t11.0\t6\n", + "#1599 rows × 12 columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Solution \n", + "#Q1\n", + "filter_wine = wine_df['residual sugar'] > 5.00\n", + "\n", + "filtered_df = wine_df[filter_wine]\n", + "\n", + "filtered_df.sort_values(by=['quality','residual sugar'], ascending = False)\n", + "\n", + "#\tfixed acidity\tvolatile acidity\tcitric acid\tresidual sugar\tchlorides\tfree sulfur dioxide\ttotal sulfur dioxide\tdensity\tpH\tsulphates\talcohol\tquality\n", + "#278\t10.3\t0.320\t0.45\t6.4\t0.073\t5.0\t13.0\t0.99760\t3.23\t0.82\t12.6\t8\n", + "#455\t11.3\t0.620\t0.67\t5.2\t0.086\t6.0\t19.0\t0.99880\t3.22\t0.69\t13.4\t8\n", + "#1043\t9.5\t0.390\t0.41\t8.9\t0.069\t18.0\t39.0\t0.99859\t3.29\t0.81\t10.9\t7\n", + "#1079\t7.9\t0.300\t0.68\t8.3\t0.050\t37.5\t278.0\t0.99316\t3.01\t0.51\t12.3\t7\n", + "#1081\t7.9\t0.300\t0.68\t8.3\t0.050\t37.5\t289.0\t0.99316\t3.01\t0.51\t12.3\t7\n", + "#...\t...\t...\t...\t...\t...\t...\t...\t...\t...\t...\t...\t...\n", + "#57\t7.5\t0.630\t0.12\t5.1\t0.111\t50.0\t110.0\t0.99830\t3.26\t0.77\t9.4\t5\n", + "#1235\t6.0\t0.330\t0.32\t12.9\t0.054\t6.0\t113.0\t0.99572\t3.30\t0.56\t11.5\t4\n", + "#1276\t8.5\t0.400\t0.40\t6.3\t0.050\t3.0\t10.0\t0.99566\t3.28\t0.56\t12.0\t4\n", + "#1176\t6.5\t0.880\t0.03\t5.6\t0.079\t23.0\t47.0\t0.99572\t3.58\t0.50\t11.2\t4\n", + "#1478\t7.1\t0.875\t0.05\t5.7\t0.082\t3.0\t14.0\t0.99808\t3.40\t0.52\t10.2\t3\n", + "#84 rows × 12 columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Q2\n", + "filter_wine_2 = wine_df['citric acid'] < 0.4\n", + "\n", + "filtered_df_2 = wine_df[filter_wine_2]\n", + "filtered_df_2['quality'].value_counts()\n", + "\n", + "#5 534\n", + "#6 436\n", + "#7 96\n", + "#4 45\n", + "#8 9\n", + "#3 7\n", + "#Name: quality, dtype: int64" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(96+9)\n", + "\n", + "#105" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/21 Day Challenge/Day 14.ipynb b/21 Day Challenge/Day 14.ipynb new file mode 100644 index 0000000..03105f7 --- /dev/null +++ b/21 Day Challenge/Day 14.ipynb @@ -0,0 +1,143 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Challenge\n", + "Dot's neighbour said that he only likes wine from Stellenbosch, Bordeaux, and the Okanagan Valley, and that the sulfates can't be that high. The problem is, Dot can't really afford to spend tons of money on the wine. Dot's conditions for searching for wine are:\n", + "\n", + "Sulfates cannot be higher than 0.6.\n", + "The price has to be less than $20.\n", + "Use the above conditions to filter the data for questions 2 and 3 below.\n", + "\n", + "Questions:\n", + "\n", + "Where is Stellenbosch, anyway? How many wines from Stellenbosch are there in the entire dataset?\n", + "After filtering with the 2 conditions, what is the average price of wine from the Bordeaux region?\n", + "After filtering with the 2 conditions, what is the least expensive wine that's of the highest quality from the Okanagan Valley?\n", + "Stretch Question:\n", + "\n", + "What is the average price of wine from Stellenbosch, according to the entire unfiltered dataset?\n", + "Note: Check the dataset to see if there are missing values; if there are, fill in missing values with the mean." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "df = pd.read_csv('winequality-red_2.csv')\n", + "df = df.drop(columns = ['Unnamed: 0'])\n", + "\n", + "df.head()\n", + "\n", + "#fixed acidity\tvolatile acidity\tcitric acid\tresidual sugar\tchlorides\tfree sulfur dioxide\ttotal sulfur dioxide\tdensity\tpH\tsulphates\talcohol\tquality\tregion\tprice\n", + "#0\t7.4\t0.70\t0.00\t1.9\t0.076\t11.0\t34.0\t0.9978\t3.51\t0.56\t9.4\t5\tColchagua Valley\t64\n", + "#1\t7.8\t0.88\t0.00\t2.6\t0.098\t25.0\t67.0\t0.9968\t3.20\t0.68\t9.8\t5\tBordeaux\t89\n", + "#2\t7.8\t0.76\t0.04\t2.3\t0.092\t15.0\t54.0\t0.9970\t3.26\t0.65\t9.8\t5\tLa Rjoja\t25\n", + "#3\t11.2\t0.28\t0.56\t1.9\t0.075\t17.0\t60.0\t0.9980\t3.16\t0.58\t9.8\t6\tWillamette\t27\n", + "#4\t7.4\t0.70\t0.00\t1.9\t0.076\t11.0\t34.0\t0.9978\t3.51\t0.56\t9.4\t5\tMarlborough\t9\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Solutions\n", + "#Q1\n", + "df['region'].value_counts()\n", + "\n", + "#La Rjoja 341\n", + "#Bordeaux 264\n", + "#Colchagua Valley 260\n", + "#Okanagan Valley 256\n", + "#Willamette 233\n", + "#Marlborough 210\n", + "#Stellenbosch 35\n", + "#Name: region, dtype: int64" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Q2\n", + "filter_sulhpates = df['sulphates'] <= 0.6\n", + "filtered_df = df[filter_sulhpates]\n", + "\n", + "filter_quality = filtered_df['price'] < 20\n", + "filtered_df = filtered_df[filter_quality]\n", + "\n", + "iltered_df.groupby(['region']).mean()\n", + "#Answer is $11.714\n", + "\n", + "#\tfixed acidity\tvolatile acidity\tcitric acid\tresidual sugar\tchlorides\tfree sulfur dioxide\ttotal sulfur dioxide\tdensity\tpH\tsulphates\talcohol\tquality\tprice\n", + "#region\t\t\t\t\t\t\t\t\t\t\t\t\t\n", + "#Bordeaux\t7.680000\t0.614250\t0.210000\t2.785000\t0.082800\t12.350000\t34.350000\t0.996007\t3.328500\t0.550000\t10.295000\t5.550000\t11.300000\n", + "#Colchagua Valley\t8.250000\t0.659000\t0.250000\t2.070000\t0.096400\t9.400000\t44.100000\t0.996521\t3.285000\t0.555000\t10.150000\t5.000000\t13.200000\n", + "#La Rjoja\t8.424138\t0.634483\t0.244483\t3.415517\t0.085724\t18.706897\t62.051724\t0.997101\t3.276552\t0.545517\t10.196552\t5.241379\t12.241379\n", + "#Marlborough\t7.100000\t0.764000\t0.106000\t2.366667\t0.081667\t18.800000\t45.000000\t0.995860\t3.460667\t0.548667\t10.380000\t5.333333\t9.333333\n", + "#Okanagan Valley\t7.826667\t0.537000\t0.171333\t2.303333\t0.075733\t16.733333\t48.466667\t0.995837\t3.320667\t0.542667\t10.746667\t5.533333\t10.400000\n", + "#Stellenbosch\t8.666667\t0.336667\t0.356667\t5.866667\t0.064000\t18.000000\t71.666667\t0.996373\t3.226667\t0.543333\t10.200000\t4.333333\t17.333333\n", + "#Willamette\t7.600000\t0.616765\t0.155882\t2.282353\t0.078353\t13.529412\t40.176471\t0.996150\t3.374706\t0.551176\t10.505882\t5.411765\t11.647059" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Q3\n", + "filter_region = df['region'] == 'Okanagan Valley'\n", + "filtered_df = filtered_df[filter_region]\n", + "filtered_df.sort_values(by=['quality', 'price'], ascending = [False,True])\n", + "\n", + "#fixed acidity\tvolatile acidity\tcitric acid\tresidual sugar\tchlorides\tfree sulfur dioxide\ttotal sulfur dioxide\tdensity\tpH\tsulphates\talcohol\tquality\tregion\tprice\n", + "#1025\t8.6\t0.830\t0.00\t2.80\t0.095\t17.0\t43.0\t0.99822\t3.33\t0.60\t10.4\t6\tOkanagan Valley\t4\n", + "#1017\t8.0\t0.180\t0.37\t0.90\t0.049\t36.0\t109.0\t0.99007\t2.89\t0.44\t12.7\t6\tOkanagan Valley\t5\n", + "#1127\t6.3\t0.760\t0.00\t2.90\t0.072\t26.0\t52.0\t0.99379\t3.51\t0.60\t11.5\t6\tOkanagan Valley\t5\n", + "#1490\t7.1\t0.220\t0.49\t1.80\t0.039\t8.0\t18.0\t0.99344\t3.39\t0.56\t12.4\t6\tOkanagan Valley\t5\n", + "#884\t8.8\t0.610\t0.19\t4.00\t0.094\t30.0\t69.0\t0.99787\t3.22\t0.50\t10.0\t6\tOkanagan Valley\t9\n", + "#788\t10.0\t0.560\t0.24\t2.20\t0.079\t19.0\t58.0\t0.99910\t3.18\t0.56\t10.1\t6\tOkanagan Valley\t13\n", + "#300\t7.5\t0.530\t0.06\t2.60\t0.086\t20.0\t44.0\t0.99650\t3.38\t0.59\t10.7\t6\tOkanagan Valley\t17\n", + "#999\t6.4\t0.690\t0.00\t1.65\t0.055\t7.0\t12.0\t0.99162\t3.47\t0.53\t12.9\t6\tOkanagan Valley\t18\n", + "#189\t7.9\t0.490\t0.32\t1.90\t0.082\t17.0\t144.0\t0.99680\t3.20\t0.55\t9.5\t5\tOkanagan Valley\t4\n", + "#725\t9.0\t0.660\t0.17\t3.00\t0.077\t5.0\t13.0\t0.99760\t3.29\t0.55\t10.4\t5\tOkanagan Valley\t5\n", + "#5\t7.4\t0.660\t0.00\t1.80\t0.075\t13.0\t40.0\t0.99780\t3.51\t0.56\t9.4\t5\tOkanagan Valley\t10\n", + "#1272\t5.9\t0.460\t0.00\t1.90\t0.077\t25.0\t44.0\t0.99385\t3.50\t0.53\t11.2\t5\tOkanagan Valley\t11\n", + "#252\t11.1\t0.350\t0.48\t3.10\t0.090\t5.0\t21.0\t0.99860\t3.17\t0.53\t10.5\t5\tOkanagan Valley\t15\n", + "#1355\t6.1\t0.320\t0.25\t1.80\t0.086\t5.0\t32.0\t0.99464\t3.36\t0.44\t10.1\t5\tOkanagan Valley\t17\n", + "#1553\t7.3\t0.735\t0.00\t2.20\t0.080\t18.0\t28.0\t0.99765\t3.41\t0.60\t9.4\t5\tOkanagan Valley\t18" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/21 Day Challenge/Day 15.ipynb b/21 Day Challenge/Day 15.ipynb new file mode 100644 index 0000000..1dfc0cc --- /dev/null +++ b/21 Day Challenge/Day 15.ipynb @@ -0,0 +1,93 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Dot already has a few seeds they can use for their garden. They need to figure out which of the seeds will produce the biggest potential harvest. Can you help Dot decide which seeds are best, by using data visualization?\n", + "\n", + "Create a bar graph with Matplotlib that shows each vegetable and the size of the potential harvest that Dot can expect from them.\n", + "\n", + "Which of Dot's seeds will produce the largest harvest?\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "\n", + "seeds = {\n", + " 'Vegetable' : ['Carrots', 'Tomatoes', 'Potatoes', 'Eggplant', 'Cucumbers'],\n", + " 'Seeds_Count' : [300,10,90,100,15],\n", + " 'Each_Seed_Produces': [1,140,10,5, 90]\n", + "}\n", + "\n", + "df = pd.DataFrame(seeds)\n", + "df\n", + "\n", + "#Vegetable\tSeeds_Count\tEach_Seed_Produces\n", + "#0\tCarrots\t300\t1\n", + "#1\tTomatoes\t10\t140\n", + "#2\tPotatoes\t90\t10\n", + "#3\tEggplant\t100\t5\n", + "#4\tCucumbers\t15\t90" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Solution\n", + "df['potential_harvest'] = df.Seeds_Count * df.Each_Seed_Produces\n", + "df\n", + "\n", + "#Vegetable\tSeeds_Count\tEach_Seed_Produces\tpotential_harvest\n", + "#0\tCarrots\t300\t1\t300\n", + "#1\tTomatoes\t10\t140\t1400\n", + "#2\tPotatoes\t90\t10\t900\n", + "#3\tEggplant\t100\t5\t500\n", + "#4\tCucumbers\t15\t90\t1350" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure()\n", + "plt.bar(x = df['Vegetable'], height = df['potential_harvest'])\n", + "plt.title('Potential Harvest')\n", + "plt.show()\n", + "\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/21 Day Challenge/Day 16.ipynb b/21 Day Challenge/Day 16.ipynb new file mode 100644 index 0000000..cfa9985 --- /dev/null +++ b/21 Day Challenge/Day 16.ipynb @@ -0,0 +1,125 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Challenge\n", + "Create a boxplot to answer the following questions:\n", + "\n", + "How many books have over 4000 pages?\n", + "\n", + "Note: Do not use a fitler, use a boxplot.\n", + "\n", + "What are the average ratings for books that have over 4000 pages?\n", + "\n", + "Note: You can use a filter for question 2." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(\"books.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Solution\n", + "df.head(2)\n", + "\n", + "#bookID\ttitle\tauthors\taverage_rating\tisbn\tisbn13\tlanguage_code\tnum_pages\tratings_count\ttext_reviews_count\tpublication_date\tpublisher\n", + "#0\t1\tHarry Potter and the Half-Blood Prince (Harry ...\tJ.K. Rowling/Mary GrandPré\t4.57\t0439785960\t9780439785969\teng\t652\t2095690\t27591\t9/16/2006\tScholastic Inc.\n", + "#1\t2\tHarry Potter and the Order of the Phoenix (Har...\tJ.K. Rowling/Mary GrandPré\t4.49\t0439358078\t9780439358071\teng\t870\t2153167\t29221\t9/1/2004\tScholastic Inc." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.columns\n", + "\n", + "#Index(['bookID', 'title', 'authors', 'average_rating', 'isbn', 'isbn13',\n", + "# 'language_code', 'num_pages', 'ratings_count', 'text_reviews_count',\n", + "# 'publication_date', 'publisher'],\n", + "# dtype='object')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize = (14,4))\n", + "plt.boxplot(df['num_pages'], vert = False)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "filter_pages = df['num_pages'] > 3000\n", + "df_2 = df[filter_pages]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_2.sort_values(by = 'num_pages', ascending = False)\n", + "\n", + "#\tbookID\ttitle\tauthors\taverage_rating\tisbn\tisbn13\tlanguage_code\tnum_pages\tratings_count\ttext_reviews_count\tpublication_date\tpublisher\n", + "#6497\t24520\tThe Complete Aubrey/Maturin Novels (5 Volumes)\tPatrick O'Brian\t4.70\t039306011X\t9780393060119\teng\t6576\t1338\t81\t10/17/2004\tW. W. Norton Company\n", + "#6802\t25587\tThe Second World War\tWinston S. Churchill/John Keegan\t4.45\t039541685X\t9780395416853\teng\t4736\t1493\t99\t5/9/1986\tMariner Books\n", + "#10906\t44613\tRemembrance of Things Past (Boxed Set)\tMarcel Proust/C.K. Scott Moncrieff/Frederick A...\t4.34\t0701125594\t9780701125592\teng\t3400\t6\t1\t3/5/1981\tChatto & Windus\n", + "#6\t10\tHarry Potter Collection (Harry Potter #1-6)\tJ.K. Rowling\t4.73\t0439827604\t9780439827607\teng\t3342\t28242\t808\t9/12/2005\tScholastic\n", + "#6822\t25709\tSumma Theologica 5 Vols\tThomas Aquinas\t4.12\t0870610635\t9780870610639\teng\t3020\t2734\t84\t1/1/1981\tChristian Classics" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/21 Day Challenge/Day 17.ipynb b/21 Day Challenge/Day 17.ipynb new file mode 100644 index 0000000..25fb239 --- /dev/null +++ b/21 Day Challenge/Day 17.ipynb @@ -0,0 +1,121 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Challenge\n", + "Help Dot by answering the following questions using a bar plot:\n", + "\n", + "What are the top 5 rated books in the dataset?\n", + "\n", + "What are the top 5 books with the highest average rating?\n", + "\n", + "Note: Filter out books that have low ratings_count, for question 2 filter out books with a ratings_count less than the mean.\n", + "\n", + "Stretch\n", + "\n", + "As an optional bonus question, try answering this as well:\n", + "\n", + "What are the top 5 authours with the most books in the dataset?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "df = pd.read_csv(\"books.csv\")\n", + "\n", + "df.head(2)\n", + "\n", + "#bookID\ttitle\tauthors\taverage_rating\tisbn\tisbn13\tlanguage_code\tnum_pages\tratings_count\ttext_reviews_count\tpublication_date\tpublisher\n", + "#0\t1\tHarry Potter and the Half-Blood Prince (Harry ...\tJ.K. Rowling/Mary GrandPré\t4.57\t0439785960\t9780439785969\teng\t652\t2095690\t27591\t9/16/2006\tScholastic Inc.\n", + "#1\t2\tHarry Potter and the Order of the Phoenix (Har...\tJ.K. Rowling/Mary GrandPré\t4.49\t0439358078\t9780439358071\teng\t870\t2153167\t29221\t9/1/2004\tScholastic Inc." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Solution\n", + "top_rated = df.sort_values('ratings_count', ascending = False).head(5)\n", + "top_rated\n", + "\n", + "#bookID\ttitle\tauthors\taverage_rating\tisbn\tisbn13\tlanguage_code\tnum_pages\tratings_count\ttext_reviews_count\tpublication_date\tpublisher\n", + "#10336\t41865\tTwilight (Twilight #1)\tStephenie Meyer\t3.59\t0316015849\t9780316015844\teng\t501\t4597666\t94265\t9/6/2006\tLittle Brown and Company\n", + "#1697\t5907\tThe Hobbit or There and Back Again\tJ.R.R. Tolkien\t4.27\t0618260307\t9780618260300\teng\t366\t2530894\t32871\t8/15/2002\tHoughton Mifflin\n", + "#1462\t5107\tThe Catcher in the Rye\tJ.D. Salinger\t3.80\t0316769177\t9780316769174\teng\t277\t2457092\t43499\t1/30/2001\tBack Bay Books\n", + "#307\t960\tAngels & Demons (Robert Langdon #1)\tDan Brown\t3.89\t1416524797\t9781416524793\teng\t736\t2418736\t21303\t4/1/2006\tPocket Books\n", + "#3\t5\tHarry Potter and the Prisoner of Azkaban (Harr...\tJ.K. Rowling/Mary GrandPré\t4.56\t043965548X\t9780439655484\teng\t435\t2339585\t36325\t5/1/2004\tScholastic Inc." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure()\n", + "plt.barh(top_rated['title'], top_rated['ratings_count'])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "top_ratings_filter = df.ratings_count > df.ratings_count.mean()\n", + "top_ratings = df[top_ratings_filter]\n", + "top_ratings = top_ratings.sort_values('average_rating', ascending = False).head()\n", + "top_ratings\n", + "\n", + "#bookID\ttitle\tauthors\taverage_rating\tisbn\tisbn13\tlanguage_code\tnum_pages\tratings_count\ttext_reviews_count\tpublication_date\tpublisher\n", + "#6587\t24812\tThe Complete Calvin and Hobbes\tBill Watterson\t4.82\t0740748475\t9780740748479\teng\t1456\t32213\t930\t9/6/2005\tAndrews McMeel Publishing\n", + "#4\t8\tHarry Potter Boxed Set Books 1-5 (Harry Potte...\tJ.K. Rowling/Mary GrandPré\t4.78\t0439682584\t9780439682589\teng\t2690\t41428\t164\t9/13/2004\tScholastic\n", + "#6589\t24814\tIt's a Magical World (Calvin and Hobbes #11)\tBill Watterson\t4.76\t0836221362\t9780836221367\teng\t176\t23875\t303\t9/1/1996\tAndrews McMeel Publishing\n", + "#6\t10\tHarry Potter Collection (Harry Potter #1-6)\tJ.K. Rowling\t4.73\t0439827604\t9780439827607\teng\t3342\t28242\t808\t9/12/2005\tScholastic\n", + "#6591\t24818\tThe Days Are Just Packed\tBill Watterson\t4.69\t0836217357\t9780836217353\teng\t176\t20308\t244\t9/1/1993\tAndrews McMeel Publishing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure()\n", + "plt.barh(top_ratings['title'], top_ratings['average_rating'])\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/21 Day Challenge/Day 18.ipynb b/21 Day Challenge/Day 18.ipynb new file mode 100644 index 0000000..aadde46 --- /dev/null +++ b/21 Day Challenge/Day 18.ipynb @@ -0,0 +1,111 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Challenge\n", + "What are the top 5 boardgame categories in this dataset that are not targeted for young children?\n", + "Note: For the question above, use a filter to acquire boardgames with an inteded age of 13+, there is an age column in our dataset.\n", + "Which categories of boardgames that are not targeted for young children are the same compared to the top 5 boardgames categories in the overall dataset?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('boardgames.csv')\n", + "df.head(2)\n", + "\n", + "#rank\tbgg_url\tgame_id\tnames\tmin_players\tmax_players\tavg_time\tmin_time\tmax_time\tyear\tavg_rating\tgeek_rating\tnum_votes\timage_url\tage\tmechanic\towned\tcategory\tdesigner\tweight\n", + "#0\t1\thttps://boardgamegeek.com/boardgame/174430/glo...\t174430\tGloomhaven\t1\t4\t120\t60\t120\t2017\t8.98893\t8.61858\t15376\thttps://cf.geekdo-images.com/original/img/lDN3...\t12\tAction / Movement Programming, Co-operative Pl...\t25928\tAdventure, Exploration, Fantasy, Fighting, Min...\tIsaac Childres\t3.7543\n", + "#1\t2\thttps://boardgamegeek.com/boardgame/161936/pan...\t161936\tPandemic Legacy: Season 1\t2\t4\t60\t60\t60\t2015\t8.66140\t8.50163\t26063\thttps://cf.geekdo-images.com/original/img/P_Sw...\t13\tAction Point Allowance System, Co-operative Pl...\t41605\tEnvironmental, Medical\tRob Daviau, Matt Leacock\t2.8210" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.category.value_counts().head(10).index\n", + "\n", + "#Index(['Card Game', 'Abstract Strategy', 'Card Game, Fantasy', 'Economic',\n", + "# 'Wargame, World War II', 'Dice', 'Fantasy', 'City Building', 'none',\n", + "# 'Card Game, Fantasy, Fighting'],\n", + "# dtype='object')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize = (14,7))\n", + "plt.bar(df.category.value_counts().head().index ,height = df.category.value_counts().head(), color = 'red')\n", + "plt.title('This is a bar plot!', fontsize =14) #Specifying a title\n", + "plt.xlabel('This is the x axis!', fontsize = 14)\n", + "plt.xticks(rotation = 'vertical')\n", + "plt.ylabel('This it the y axis!', fontsize = 14)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "age_filter = df.age >= 13\n", + "df_2 = df[age_filter]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize = (14,7))\n", + "plt.bar(df_2.category.value_counts().head(5).index ,height = df_2.category.value_counts().head(5), color = 'red')\n", + "plt.title('Top 5, for above 13!', fontsize =14) #Specifying a title\n", + "plt.xticks(rotation = 'vertical')\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/21 Day Challenge/Day 19.ipynb b/21 Day Challenge/Day 19.ipynb new file mode 100644 index 0000000..07ddb86 --- /dev/null +++ b/21 Day Challenge/Day 19.ipynb @@ -0,0 +1,129 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Challenge\n", + "What type of distribution does the column avg_time have?\n", + "\n", + "Do games that have a great avg_rating have longer play times?\n", + "\n", + "Note: For question 2, filter out games that have are above the avg_rating of 9.0.\n", + "\n", + "Stretch\n", + "\n", + "As an optional bonus question, try answering:\n", + "\n", + "What type of distribution does weight have?\n", + "\n", + "What happens to the median and mean of a skewed distribution?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('boardgames.csv')\n", + "df.head(3)\n", + "\n", + "#rank\tbgg_url\tgame_id\tnames\tmin_players\tmax_players\tavg_time\tmin_time\tmax_time\tyear\tavg_rating\tgeek_rating\tnum_votes\timage_url\tage\tmechanic\towned\tcategory\tdesigner\tweight\n", + "#0\t1\thttps://boardgamegeek.com/boardgame/174430/glo...\t174430\tGloomhaven\t1\t4\t120\t60\t120\t2017\t8.98893\t8.61858\t15376\thttps://cf.geekdo-images.com/original/img/lDN3...\t12\tAction / Movement Programming, Co-operative Pl...\t25928\tAdventure, Exploration, Fantasy, Fighting, Min...\tIsaac Childres\t3.7543\n", + "#1\t2\thttps://boardgamegeek.com/boardgame/161936/pan...\t161936\tPandemic Legacy: Season 1\t2\t4\t60\t60\t60\t2015\t8.66140\t8.50163\t26063\thttps://cf.geekdo-images.com/original/img/P_Sw...\t13\tAction Point Allowance System, Co-operative Pl...\t41605\tEnvironmental, Medical\tRob Daviau, Matt Leacock\t2.8210\n", + "#2\t3\thttps://boardgamegeek.com/boardgame/182028/thr...\t182028\tThrough the Ages: A New Story of Civilization\t2\t4\t240\t180\t240\t2015\t8.60673\t8.30183\t12352\thttps://cf.geekdo-images.com/original/img/1d2h...\t14\tAction Point Allowance System, Auction/Bidding...\t15848\tCard Game, Civilization, Economic\tVlaada Chvátil\t4.3678\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Solution\n", + "avg_time_filter = df.avg_time <= 500\n", + "df_2 = df[avg_time_filter]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure()\n", + "plt.hist(df_2.avg_time, bins = 30)\n", + "plt.show()\n", + "\n", + "#Answer is Right Skewed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#q2\n", + "rating_filter = df['avg_rating'] >= 9\n", + "df_3 = df[rating_filter]\n", + "\n", + "#Answer is Yes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_3.avg_time.mean()\n", + "\n", + "#97.5" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_2.avg_time.mean()\n", + "#82.23635625887604" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/21 Day Challenge/Day 2.ipynb b/21 Day Challenge/Day 2.ipynb new file mode 100644 index 0000000..b462f58 --- /dev/null +++ b/21 Day Challenge/Day 2.ipynb @@ -0,0 +1,79 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Challenge\n", + "Dot has a few lists you can use as reference: their grocery list, the prices they used to pay in the city, and the prices for the rural grocer. What is the price difference between groceries in the city vs. groceries in the country, as a percentage of country prices?\n", + "\n", + "Note: The index position for each item is consistent across all three lists." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Grocery List (19 items)\n", + "grocery_list = ['Bananas', 'Clementines', 'Baguette', 'Oat Milk', 'Olive Oil', 'Coffee Beans',\n", + " 'Chocolate Bar', 'Brocolli', 'Eggplant', 'Chickpeas', 'Lentils', 'Tomatoes',\n", + " 'Pasta', 'Rice', 'Yogurt', 'Blueberries', 'Onions', 'Garlic', 'Truffles']\n", + "\n", + "# City Price\n", + "city_price = [6.49, 4.99, 4.39, 4.29, 11.99, 17.99, \n", + " 3.49, 3.99, 1.10, 1.99, 2.99, 4.68, \n", + " 1.59, 8.99, 3.49, 6.99, 2.99, 1.98, 14.99]\n", + "\n", + "# Country Price\n", + "country_price = [4.49, 4.12, 3.42, 6.99, 7.99, 14.99, \n", + " 2.99, 2.49, 0.99, 1.49, 2.49, 1.99, \n", + " 1.59, 6.99, 3.89, 4.99, 1.69, 1.87, 11.49]" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.2582\n" + ] + } + ], + "source": [ + "t_c = sum(city_price)\n", + "t_ct = sum(country_price)\n", + "\n", + "diff = (t_c - t_ct)/ t_ct\n", + "\n", + "print(round(diff,4))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/21 Day Challenge/Day 20.ipynb b/21 Day Challenge/Day 20.ipynb new file mode 100644 index 0000000..2e95019 --- /dev/null +++ b/21 Day Challenge/Day 20.ipynb @@ -0,0 +1,108 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Challenge\n", + "Play around with the scatterplot and test out different correlations between the numerical categories in the dataset. Then, help Dot by answering these questions:\n", + "\n", + "What kind of correlation is there between the weight and avg_rating?\n", + "\n", + "What is the correlation coefficient between the two columns?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('boardgames.csv')\n", + "df.head(3)\n", + "\n", + "#\trank\tbgg_url\tgame_id\tnames\tmin_players\tmax_players\tavg_time\tmin_time\tmax_time\tyear\tavg_rating\tgeek_rating\tnum_votes\timage_url\tage\tmechanic\towned\tcategory\tdesigner\tweight\n", + "#0\t1\thttps://boardgamegeek.com/boardgame/174430/glo...\t174430\tGloomhaven\t1\t4\t120\t60\t120\t2017\t8.98893\t8.61858\t15376\thttps://cf.geekdo-images.com/original/img/lDN3...\t12\tAction / Movement Programming, Co-operative Pl...\t25928\tAdventure, Exploration, Fantasy, Fighting, Min...\tIsaac Childres\t3.7543\n", + "#1\t2\thttps://boardgamegeek.com/boardgame/161936/pan...\t161936\tPandemic Legacy: Season 1\t2\t4\t60\t60\t60\t2015\t8.66140\t8.50163\t26063\thttps://cf.geekdo-images.com/original/img/P_Sw...\t13\tAction Point Allowance System, Co-operative Pl...\t41605\tEnvironmental, Medical\tRob Daviau, Matt Leacock\t2.8210\n", + "#2\t3\thttps://boardgamegeek.com/boardgame/182028/thr...\t182028\tThrough the Ages: A New Story of Civilization\t2\t4\t240\t180\t240\t2015\t8.60673\t8.30183\t12352\thttps://cf.geekdo-images.com/original/img/1d2h...\t14\tAction Point Allowance System, Auction/Bidding...\t15848\tCard Game, Civilization, Economic\tVlaada Chvátil\t4.3678" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Solution\n", + "plt.figure()\n", + "plt.scatter(df.weight, df.avg_rating)\n", + "plt.figure()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.corr()\n", + "\n", + "#rank\tgame_id\tmin_players\tmax_players\tavg_time\tmin_time\tmax_time\tyear\tavg_rating\tgeek_rating\tnum_votes\tage\towned\tweight\n", + "#rank\t1.000000\t-0.029686\t0.021409\t0.018707\t0.011602\t0.021634\t0.010972\t-0.007448\t-0.385495\t-0.886759\t-0.462693\t-0.143138\t-0.465226\t-0.164812\n", + "#game_id\t-0.029686\t1.000000\t-0.144871\t0.016250\t-0.064498\t-0.104995\t-0.065314\t0.103455\t0.301063\t0.055626\t-0.077227\t0.096223\t-0.039391\t-0.092998\n", + "#min_players\t0.021409\t-0.144871\t1.000000\t0.158905\t-0.049665\t-0.022629\t-0.049235\t0.007135\t-0.216454\t-0.040871\t0.020947\t-0.004194\t0.007918\t-0.197029\n", + "#max_players\t0.018707\t0.016250\t0.158905\t1.000000\t-0.022040\t-0.020897\t-0.022011\t0.006533\t-0.067922\t-0.022980\t0.006934\t-0.003500\t0.007676\t-0.136169\n", + "#avg_time\t0.011602\t-0.064498\t-0.049665\t-0.022040\t1.000000\t0.659931\t0.999926\t0.003830\t0.161758\t-0.011703\t-0.025939\t0.025333\t-0.024539\t0.233859\n", + "#min_time\t0.021634\t-0.104995\t-0.022629\t-0.020897\t0.659931\t1.000000\t0.659825\t0.003850\t0.116633\t-0.020554\t-0.029580\t0.027504\t-0.028970\t0.234898\n", + "#max_time\t0.010972\t-0.065314\t-0.049235\t-0.022011\t0.999926\t0.659825\t1.000000\t0.003735\t0.161214\t-0.011179\t-0.025618\t0.025193\t-0.024241\t0.233712\n", + "#year\t-0.007448\t0.103455\t0.007135\t0.006533\t0.003830\t0.003850\t0.003735\t1.000000\t0.020707\t0.008029\t-0.016072\t0.098132\t-0.013931\t0.003438\n", + "#avg_rating\t-0.385495\t0.301063\t-0.216454\t-0.067922\t0.161758\t0.116633\t0.161214\t0.020707\t1.000000\t0.456277\t0.165803\t0.156797\t0.165117\t0.547244\n", + "#geek_rating\t-0.886759\t0.055626\t-0.040871\t-0.022980\t-0.011703\t-0.020554\t-0.011179\t0.008029\t0.456277\t1.000000\t0.637970\t0.163270\t0.629538\t0.203591\n", + "#num_votes\t-0.462693\t-0.077227\t0.020947\t0.006934\t-0.025939\t-0.029580\t-0.025618\t-0.016072\t0.165803\t0.637970\t1.000000\t0.045512\t0.984790\t0.014468\n", + "#age\t-0.143138\t0.096223\t-0.004194\t-0.003500\t0.025333\t0.027504\t0.025193\t0.098132\t0.156797\t0.163270\t0.045512\t1.000000\t0.052791\t0.303283\n", + "#owned\t-0.465226\t-0.039391\t0.007918\t0.007676\t-0.024539\t-0.028970\t-0.024241\t-0.013931\t0.165117\t0.629538\t0.984790\t0.052791\t1.000000\t0.006033\n", + "#weight\t-0.164812\t-0.092998\t-0.197029\t-0.136169\t0.233859\t0.234898\t0.233712\t0.003438\t0.547244\t0.203591\t0.014468\t0.303283\t0.006033\t1.000000\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 0.5472" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/21 Day Challenge/Day 21.ipynb b/21 Day Challenge/Day 21.ipynb new file mode 100644 index 0000000..bc31f44 --- /dev/null +++ b/21 Day Challenge/Day 21.ipynb @@ -0,0 +1,290 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Challenge\n", + "Dot wants to play retro video games with all their new friends! Help them figure out which games would be best.\n", + "\n", + "Questions:\n", + "\n", + "What is the correlation coefficient between Critic_Score and User_Score?\n", + "\n", + "Note: You may have to clean some of the columns and fill it with the median value (if numerical).\n", + "Plot the top 5 best selling games released before the year 2000.\n", + "\n", + "Note: Use Global_Sales\n", + "Create a new column called Aggregate_Score, which returns the proportional average between Critic Score and User_Score based on Critic_Count and User_Count. Plot a horizontal bar chart of the top 5 highest rated games by Aggregate_Score, not published by Nintendo before the year 2000. From this bar chart, what is the highest rated game by Aggregate_Score?\n", + "\n", + "Note: Critic_Count should be filled with the mean. User_Count should be filled with the median." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('video_games.csv')\n", + "df.head(2)\n", + "\n", + "#Name\tPlatform\tYear_of_Release\tGenre\tPublisher\tNA_Sales\tEU_Sales\tJP_Sales\tOther_Sales\tGlobal_Sales\tCritic_Score\tCritic_Count\tUser_Score\tUser_Count\tDeveloper\tRating\n", + "#0\tWii Sports\tWii\t2006.0\tSports\tNintendo\t41.36\t28.96\t3.77\t8.45\t82.53\t76.0\t51.0\t8.0\t322.0\tNintendo\tE\n", + "#1\tSuper Mario Bros.\tNES\t1985.0\tPlatform\tNintendo\t29.08\t3.58\t6.81\t0.77\t40.24\tNaN\tNaN\t7.5\tNaN\tNaN\tNaN" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Solutions Q1\n", + "df.Critic_Score.isnull().sum()\n", + "#8582" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.Critic_Score.median()\n", + "#71.0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Fill in with the median \n", + "df['Critic_Score'] = df['Critic_Score'].fillna(value = df.Critic_Score.median())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.scatter(df.User_Score, df.Critic_Score)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#WHat is the correlation?\n", + "df['User_Score'].corr(df['Critic_Score'])\n", + "#0.47857313968321197" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Solution Q2\n", + "best_selling_2000_filter = df[\"Year_of_Release\"] < 2000\n", + "best_selling_2000 = df[best_selling_2000_filter]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "top_5 = best_selling_2000.sort_values('Global_Sales', ascending = False).head()\n", + "best_selling_2000.sort_values('Global_Sales', ascending = False).head()\n", + "\n", + "#Name\tPlatform\tYear_of_Release\tGenre\tPublisher\tNA_Sales\tEU_Sales\tJP_Sales\tOther_Sales\tGlobal_Sales\tCritic_Score\tCritic_Count\tUser_Score\tUser_Count\tDeveloper\tRating\n", + "#1\tSuper Mario Bros.\tNES\t1985.0\tPlatform\tNintendo\t29.08\t3.58\t6.81\t0.77\t40.24\t71.0\tNaN\t7.5\tNaN\tNaN\tNaN\n", + "#4\tPokemon Red/Pokemon Blue\tGB\t1996.0\tRole-Playing\tNintendo\t11.27\t8.89\t10.22\t1.00\t31.37\t71.0\tNaN\t7.5\tNaN\tNaN\tNaN\n", + "#5\tTetris\tGB\t1989.0\tPuzzle\tNintendo\t23.20\t2.26\t4.22\t0.58\t30.26\t71.0\tNaN\t7.5\tNaN\tNaN\tNaN\n", + "#9\tDuck Hunt\tNES\t1984.0\tShooter\tNintendo\t26.93\t0.63\t0.28\t0.47\t28.31\t71.0\tNaN\t7.5\tNaN\tNaN\tNaN\n", + "#12\tPokemon Gold/Pokemon Silver\tGB\t1999.0\tRole-Playing\tNintendo\t9.00\t6.18\t7.20\t0.71\t23.10\t71.0\tNaN\t7.5\tNaN\tNaN\tNaN" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize = (14,7))\n", + "plt.bar(x = top_5.Name, height = top_5.Global_Sales)\n", + "#plt.xticks(rotation = 'vertical')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Q3 Solution\n", + "#Step 1: Fill in missing values with the median\n", + "#Columns with missing values Critic_Count and User_Count" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.Critic_Count.isnull().sum()\n", + "#8582" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Fill in with the mean\n", + "df['Critic_Count'] = df['Critic_Count'].fillna(value = df.Critic_Count.mean())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Fill in with the median\n", + "df['User_Count'] = df['User_Count'].fillna(value = df.User_Count.median())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Up the User_score\n", + "df['User_Score'] = df['User_Score'] * 10" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Create aggregate Score\n", + "\n", + "df['Aggregate_Score'] = ((df['Critic_Score'] * df['Critic_Count']) + (df['User_Score'] * df['User_Count']))/(df['Critic_Count'] + df['User_Count'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df[\"Aggregate_Score\"].describe()\n", + "\n", + "#count 16719.000000\n", + "#mean 71.803709\n", + "#std 8.598452\n", + "#min 10.054054\n", + "#25% 72.503918\n", + "#50% 73.014029\n", + "#75% 73.165747\n", + "#max 94.193168\n", + "#Name: Aggregate_Score, dtype: float64" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "nintendo_filter_year = df[\"Year_of_Release\"] < 2000\n", + "nintendo_filter_publisher = df[\"Publisher\"] != 'Nintendo'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "nintendo = df[nintendo_filter_year]\n", + "nintendo = nintendo[nintendo_filter_publisher]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "top_5_nintendo = nintendo.sort_values('Aggregate_Score', ascending = False).head()\n", + "nintendo.sort_values('Aggregate_Score', ascending = False).head()\n", + "\n", + "#Name\tPlatform\tYear_of_Release\tGenre\tPublisher\tNA_Sales\tEU_Sales\tJP_Sales\tOther_Sales\tGlobal_Sales\tCritic_Score\tCritic_Count\tUser_Score\tUser_Count\tDeveloper\tRating\tAggregate_Score\n", + "#146\tMetal Gear Solid\tPS\t1998.0\tAction\tKonami Digital Entertainment\t3.18\t1.83\t0.78\t0.24\t6.03\t94.0\t20.0\t94.0\t918.0\tKCEJ\tM\t94.000000\n", + "#1546\tCastlevania: Symphony of the Night\tPS\t1997.0\tPlatform\tKonami Digital Entertainment\t0.58\t0.40\t0.21\t0.08\t1.27\t93.0\t12.0\t94.0\t358.0\tKonami\tT\t93.967568\n", + "#1712\tShenmue\tDC\t1999.0\tAdventure\tSega\t0.52\t0.24\t0.38\t0.04\t1.18\t88.0\t9.0\t94.0\t201.0\tSega AM2\tT\t93.742857\n", + "#5585\tHarvest Moon: Back to Nature\tPS\t1999.0\tSimulation\tUbisoft\t0.11\t0.07\t0.12\t0.02\t0.32\t82.0\t6.0\t93.0\t78.0\tVictor Interactive Software\tE\t92.214286\n", + "#65\tFinal Fantasy VII\tPS\t1997.0\tRole-Playing\tSony Computer Entertainment\t3.01\t2.47\t3.28\t0.96\t9.72\t92.0\t20.0\t92.0\t1282.0\tSquareSoft\tT\t92.000000" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize = (14,7))\n", + "plt.barh(y = top_5_nintendo.Name, width = top_5_nintendo.Aggregate_Score)\n", + "#plt.xticks(rotation = 'vertical')\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/21 Day Challenge/Day 3.ipynb b/21 Day Challenge/Day 3.ipynb new file mode 100644 index 0000000..386341c --- /dev/null +++ b/21 Day Challenge/Day 3.ipynb @@ -0,0 +1,274 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Challenge\n", + "Dot has some specific rules for what they want to change in the shopping list:\n", + "\n", + "They hate oak wood, and prefer maple.\n", + "They want to paint all the rooms blue except the kitchen, which they want to paint white.\n", + "old_blueprint = {\n", + " \"Kitchen\": ['Dirty', 'Oak', \"Damaged\", \"Green\"],\n", + " \"Dining Room\": ['Dirty', 'Pine', 'Good Condition', 'Grey'],\n", + " \"Living Room\": ['Dirty', 'Oak', 'Damaged', 'Red'],\n", + " \"Bedroom\" : [\"Clean\", 'Mahogany', 'Good Condition', 'Green'],\n", + " \"Bathroom\": [\"Dirty\", 'White Tile', 'Good Condition','White'],\n", + " \"Shed\" : ['Dirty', \"Cherry\", \"Damaged\", \"Un-painted\"]\n", + "}\n", + "\n", + "shopping_list = ['20 x Oak Plank', '20 x Oak Plank', '20 x Cherry Plank', 'White Paint', 'White Paint', 'White Paint']\n", + "Note: The blueprint above is in a dictionary format and we won't be needing to work with dictionaries in the challenge, use the blueprint as reference only.\n", + "\n", + "Use python's pop(), insert(), and append() list functions to change the shopping_list above so that it reflects the right materials needed.\n", + "\n", + "The list should be ordered by wood types first, then paint types.\n", + "\n", + "example_shopping_list = ['wood type in room A', 'wood type in room b','paint type in room a','paint type in room b']\n", + "Create a paint_list list from the new_shopping_list list using the built in python list indexing ability." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "shopping_list = ['20 x Oak plank', '20 x Oak Plank', '20 x Cherry Plank', 'White Paint', 'White Paint', 'White Paint']" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['20 x Oak plank', '20 x Oak Plank', '20 x Cherry Plank', 'White Paint', 'White Paint', 'White Paint']\n" + ] + } + ], + "source": [ + "#Solution\n", + "new_shopping_list = shopping_list\n", + "print(new_shopping_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['20 x Oak plank', '20 x Oak Plank', '20 x Cherry Plank', 'White Paint', 'White Paint']\n" + ] + } + ], + "source": [ + "new_shopping_list.pop(-1)\n", + "print(new_shopping_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['20 x Oak plank', '20 x Oak Plank', '20 x Cherry Plank', 'White Paint']\n" + ] + } + ], + "source": [ + "new_shopping_list.pop(-1)\n", + "print(new_shopping_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['20 x Oak plank', '20 x Oak Plank', '20 x Cherry Plank', 'White Paint', 'Blue Paint']\n" + ] + } + ], + "source": [ + "new_shopping_list.append('Blue Paint')\n", + "print(new_shopping_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['20 x Oak plank', '20 x Oak Plank', '20 x Cherry Plank', 'White Paint', 'Blue Paint', 'Blue Paint']\n" + ] + } + ], + "source": [ + "new_shopping_list.append('Blue Paint')\n", + "print(new_shopping_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['20 x Oak plank', '20 x Oak Plank', '20 x Cherry Plank', 'White Paint', 'Blue Paint', 'Blue Paint', 'Blue Paint']\n" + ] + } + ], + "source": [ + "new_shopping_list.append('Blue Paint')\n", + "print(new_shopping_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['20 x Oak plank', '20 x Oak Plank', '20 x Cherry Plank', 'White Paint', 'Blue Paint', 'Blue Paint', 'Blue Paint', 'Blue Paint']\n" + ] + } + ], + "source": [ + "new_shopping_list.append('Blue Paint')\n", + "print(new_shopping_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['20 x Oak plank', '20 x Oak Plank', '20 x Cherry Plank', 'White Paint', 'Blue Paint', 'Blue Paint', 'Blue Paint', 'Blue Paint', 'Blue Paint']\n" + ] + } + ], + "source": [ + "new_shopping_list.append('Blue Paint')\n", + "print(new_shopping_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['20 x Maple Plank', '20 x Oak plank', '20 x Oak Plank', '20 x Cherry Plank', 'White Paint', 'Blue Paint', 'Blue Paint', 'Blue Paint', 'Blue Paint', 'Blue Paint']\n" + ] + } + ], + "source": [ + "new_shopping_list.insert(0, \"20 x Maple Plank\")\n", + "print(new_shopping_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['20 x Maple Plank', '20 x Maple Plank', '20 x Oak Plank', '20 x Cherry Plank', 'White Paint', 'Blue Paint', 'Blue Paint', 'Blue Paint', 'Blue Paint', 'Blue Paint']\n" + ] + } + ], + "source": [ + "new_shopping_list.pop(1)\n", + "new_shopping_list.insert(1, \"20 x Maple Plank\")\n", + "print(new_shopping_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "new_shopping_list = ['20 x Maple Plank', '20 x Maple Plank', '20 x Cherry Plank', 'White Paint', \n", + " 'Blue Paint','Blue Paint','Blue Paint','Blue Paint', 'Blue Paint']" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['White Paint', 'Blue Paint', 'Blue Paint', 'Blue Paint', 'Blue Paint', 'Blue Paint']\n" + ] + } + ], + "source": [ + "# Solution\n", + "paint_list = new_shopping_list[3:]\n", + "print(paint_list)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/21 Day Challenge/Day 4.ipynb b/21 Day Challenge/Day 4.ipynb new file mode 100644 index 0000000..caf752f --- /dev/null +++ b/21 Day Challenge/Day 4.ipynb @@ -0,0 +1,93 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Challenge:\n", + "Dot needs to purchase:\n", + "\n", + "600 planks of Oak Wood\n", + "150 liters of Blue Paint\n", + "15 liters of White Paint\n", + "165 liters of Paint Finish\n", + "Item\tNeeded Amount to Buy\tWholesale Price\tRetail Price\n", + "Plank of Oak Wood\t600\t$ 7000\t$ 12.99\n", + "1 Liter of Blue Paint\t150\t$ 1000\t$ 8.99\n", + "1 Liter of White Paint\t15\t$ 1000\t$ 9.99\n", + "1 Liter of Paint Finish\t165\t$ 800\t$ 3.99\n", + "Use a loop to determine the price Dot would pay for purchasing supplies at the retail price. Based on that calculation, which itmes should Dot buy at retail vs. wholesale?\n", + "\n", + "Note: Assume the wholesale price covers all the supply Dot needs for each item, whereas the retail price is per single unit." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "item_list = ['Oak Wood', 'Blue Paint', 'White Paint', 'Paint Finish']\n", + "\n", + "amount_list = [600,150,15,165]\n", + "\n", + "wholesale_price_list = [7000, 1000, 1000, 800]\n", + "\n", + "retail_price = [12.99, 8.99, 9.99, 3.99]" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "#Solution\n", + "full_price = []\n", + "\n", + "for i in range(len(item_list)):\n", + " \n", + " total_price = amount_list[i] * retail_price[i]\n", + " full_price.append(total_price)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[7794.0, 1348.5, 149.85, 658.35]\n" + ] + } + ], + "source": [ + "print(full_price)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/21 Day Challenge/Day 5.ipynb b/21 Day Challenge/Day 5.ipynb new file mode 100644 index 0000000..93f9c15 --- /dev/null +++ b/21 Day Challenge/Day 5.ipynb @@ -0,0 +1,83 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Challenge\n", + "Dot's okay with paying a bit of a surplus for convenience, but they don't want to go broke buying dustpans and glass cleaner. Help them figure out which items cost over 10% more at the nearby store, so they can avoid buying these items.\n", + "\n", + "Using Python, develop a list of items that are too expensive for Dot to purchase at the nearby store." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Cleaning Supplies List (19 items)\n", + "cleaningsupplies_list = ['Broom', 'Mop', 'Dustpan', 'Garbage Bags', 'Glass Cleaner', 'Vinegar',\n", + " 'Soap', 'Bleach', 'Duster', 'Floor Cleaner', 'Sponges', 'Dish Soap',\n", + " 'Drain Cleaner', 'Paper Towels', 'Cleaning Rags', 'Toilet Cleaner', \n", + " 'Rubber Gloves', 'Alcohol Wipes', 'Squeegee']\n", + "\n", + "# City Price\n", + "city_price = [6.49, 4.99, 3.39, 4.29, 3.99, 1.99, \n", + " 1.50, 3.99, 4.99, 5.99, 2.99, 2.99, \n", + " 5.99, 2.99, 3.49, 6.99, 2.99, 1.98, 11.99]\n", + "\n", + "# Country Price\n", + "country_price = [5.49, 4.69, 4.42, 5.99, 5.99, 2.50,\n", + " 1.25, 2.49, 4.50, 6.75, 2.49, 1.99, \n", + " 6.25, 3.99, 3.59, 4.99, 1.69, 1.87, 10.99]" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Dustpan', 'Garbage Bags', 'Glass Cleaner', 'Vinegar', 'Floor Cleaner', 'Paper Towels']\n" + ] + } + ], + "source": [ + "#Solution\n", + "expensive_items = []\n", + "\n", + "for i in range(len(cleaningsupplies_list)):\n", + " \n", + " if ((country_price[i] - city_price[i])/city_price[i]) > 0.1:\n", + " expensive_items.append(cleaningsupplies_list[i])\n", + " \n", + "print(expensive_items)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/21 Day Challenge/Day 6.ipynb b/21 Day Challenge/Day 6.ipynb new file mode 100644 index 0000000..6e7cd7e --- /dev/null +++ b/21 Day Challenge/Day 6.ipynb @@ -0,0 +1,227 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Challenge:\n", + "There are many holes in the living room's ceiling that desperately need to be fixed. Dot's measured them, and in total there are about 100. They need to figure out how much does it cost to fix all of the holes. Differently sized holes will require differently sized patches to fix them.\n", + "\n", + "Size of Hole\tCost to Fix\n", + "Small (less than 20 mm)\t$1.30\n", + "Medium (above or equal to 20 mm AND less than 70mm)\t$1.60\n", + "Large (above or equal to 70 mm)\t$2.10\n", + "Dot needs you to look at the measurements and figure out the answers to the following questions:\n", + "\n", + "What is the average sized hole?\n", + "What is the average cost to fix a hole?\n", + "What is the total cost of fixing all of the holes?\n", + "\n", + "Note: Use a for loop and an if else statement to answer Q3.\n", + "Stretch Question:\n", + "\n", + "Stretch Questions are not required to be completed to finish the challenge but are recommended to further develop your skills.\n", + "\n", + "What is the maximum sized hole?\n", + "What is the minimum sized hole?" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[9, 3, 1, 21, 35]" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import random \n", + "random.seed(34)\n", + "\n", + "hole_sizes = [random.randint(1, i) for i in range(1, 101)]\n", + "random.shuffle(hole_sizes)\n", + "\n", + "# hole sizes in mm\n", + "hole_sizes[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "28.39\n", + "28.39\n" + ] + } + ], + "source": [ + "#SOLUTION\n", + "#q1\n", + "print(sum(hole_sizes)/len(hole_sizes))\n", + "\n", + "#or\n", + "\n", + "#Pandas version\n", + "import pandas as pd\n", + "hs = pd.Series(hole_sizes)\n", + "print(hs.mean())" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "96" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + " #STRETCH #q1\n", + "hs.max()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# STRETCH #q2\n", + "hs.min()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The amount of small holes is 44.\n", + "The amount of small holes is 52.\n", + "The amount of small holes is 4.\n" + ] + } + ], + "source": [ + "#Q2\n", + "count_small = 0\n", + "count_medium = 0\n", + "count_large = 0 \n", + "\n", + "for i in hole_sizes:\n", + " if i < 20:\n", + " count_small += 1\n", + " elif i >= 20 and i < 70:\n", + " count_medium += 1 \n", + " else:\n", + " count_large += 1\n", + " \n", + "print(f'The amount of small holes is {count_small}.')\n", + "print(f'The amount of small holes is {count_medium}.')\n", + "print(f'The amount of small holes is {count_large}.')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.488\n" + ] + } + ], + "source": [ + "average_cost = ((count_small*1.3)+(count_medium*1.6)+(count_large*2.1))/ 100\n", + "print(round(average_cost,4))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "148.8\n" + ] + } + ], + "source": [ + "#Q3\n", + "total_cost = 0\n", + "\n", + "for i in hole_sizes:\n", + " if i < 20:\n", + " total_cost += 1.3\n", + " elif i >= 20 and i < 70:\n", + " total_cost += 1.6 \n", + " else:\n", + " total_cost += 2.1\n", + " \n", + "print(round(total_cost,4))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/21 Day Challenge/Day 7.ipynb b/21 Day Challenge/Day 7.ipynb new file mode 100644 index 0000000..8fdbd06 --- /dev/null +++ b/21 Day Challenge/Day 7.ipynb @@ -0,0 +1,113 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Challenge\n", + "Dot has a lot of different boxes laying around. They need a system for how to unpack them, or they'll just continue procrastinating. Help Dot sort the boxes by their weight.\n", + "\n", + "Box:\tWeight (kg)\n", + "Box 1\t4\n", + "Box 2\t2\n", + "Box 3\t18\n", + "Box 4\t21\n", + "Box 5\t14\n", + "Box 6\t13\n", + "Create a function that will open the boxes according to their weight, from lightest to heaviest." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "#example dicitonary\n", + "user_boxes = {'weight': [4,2,18,21,14,13],\n", + " 'box_name': ['box1','box2', 'box3', 'box4', 'box5', 'box6']\n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "#Solution:\n", + "def open_box_order(user_boxes):\n", + " \n", + " for i in range(len(user_boxes['weight'])):\n", + " \n", + " for j in range(len(user_boxes['weight']) - 1):\n", + " \n", + " if user_boxes['weight'][j] > user_boxes['weight'][j+1]:\n", + " \n", + " user_boxes['weight'][j], user_boxes['weight'][j+1] = user_boxes['weight'][j + 1],user_boxes['weight'][j]\n", + " user_boxes['box_name'][j], user_boxes['box_name'][j+1] = user_boxes['box_name'][j + 1],user_boxes['box_name'][j]\n", + " \n", + " return print(user_boxes['box_name'])" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['box2', 'box1', 'box6', 'box5', 'box3', 'box4']\n" + ] + } + ], + "source": [ + "open_box_order(user_boxes)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'weight': [2, 4, 13, 14, 18, 21],\n", + " 'box_name': ['box2', 'box1', 'box6', 'box5', 'box3', 'box4']}" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "user_boxes" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/21 Day Challenge/Day 8.ipynb b/21 Day Challenge/Day 8.ipynb new file mode 100644 index 0000000..f14fefc --- /dev/null +++ b/21 Day Challenge/Day 8.ipynb @@ -0,0 +1,163 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Pandas is one of the most widely used Python plugins. Pandas can be used when working with large datasets, or when performing data cleaning, manipulation, and anlaysis." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Before we can use the pandas plugin, we have to import it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#import the pandas plugin\n", + "import pandas as pd # pd is the alias we have given to pandas." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#To Read a Dataset\n", + "#Milk.csv is stored into a Pandas DataFrame variable called df.\n", + "df = pd.read_csv('milk.csv')\n", + "\n", + "#df.head() function displays the first 5 rows of the dataset\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "DataFrame Functions\n", + "\n", + "df.describe() provides descriptive statistics of all numerical columns\n", + "df.unique() Number of unique items in a column\n", + "df.shape() gets the number of rows and columsn in the dataframe\n", + "DataFrame Column Functions\n", + "\n", + "info() provides an overview of all the columns, number of non-nulls, and data types in a DataFrame\n", + "max() gets the max value from a column\n", + "min() gets the min value from a column\n", + "mean() get the mean value from a column\n", + "idxmax() gets the integer index position of the max value from a column\n", + "idxmin() gets the integer index position of the min value from a column\n", + "loc() gets rows (or columns) with particular labels from the index\n", + "iloc() gets rows (or columns) with particular positions in the index (only takes integers)\n", + "#example using info()\n", + "df.info()\n", + "\n", + "#example calling the max number from a column\n", + "df['column_name'].max()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Challenge\n", + "After playing around with the functions above, you can start helping Dot figure out when the best time to rent a cow might be. With this dataset, you can take a look at how cows produce milk over time.\n", + "\n", + "Answer the following questions for Dot:\n", + "\n", + "At what year and month did company x produce the most milk?\n", + "At what year and month did company x produce the least milk?\n", + "Stretch\n", + "\n", + "Stretch questions are not required to be completed for the challenge, but you can test your skills with more advanced challenges.\n", + "\n", + "Which year produced the most milk?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Solutions Q1\n", + "max_milk = df['Monthly milk production: pounds per cow'].max()\n", + "max_milk_index = df['Monthly milk production: pounds per cow'].idxmax()\n", + "\n", + "print(f'The maximum average milk production is {max_milk} during the year-month of {max_milk_index}.')\n", + "\n", + "#The maximum average milk production is 969 during the year-month of 148." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.loc[148]\n", + "\n", + "#Month 19-Jun\n", + "#Monthly milk production: pounds per cow 969\n", + "#Name: 148, dtype: object" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Solutions Q2\n", + "min_milk = df['Monthly milk production: pounds per cow'].min()\n", + "min_milk_index = df['Monthly milk production: pounds per cow'].idxmin()\n", + "\n", + "print(f'The minimum average milk production is {min_milk} during the year-month of {min_milk_index}.')\n", + "\n", + "#The minimum average milk production is 553 during the year-month of 10." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.iloc[10]\n", + "\n", + "#Month 07-Dec\n", + "#Monthly milk production: pounds per cow 553\n", + "#Name: 10, dtype: object" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/21 Day Challenge/Day 9.ipynb b/21 Day Challenge/Day 9.ipynb new file mode 100644 index 0000000..d4ca847 --- /dev/null +++ b/21 Day Challenge/Day 9.ipynb @@ -0,0 +1,163 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "df = pd.read_csv('milk_2.csv')\n", + "\n", + "print(df.head(3)) #Inputing the value 3 inside the brackets of the df.head() function allows us to\n", + " #override the default value of 5.\n", + "print('\\n') # \n", + "\n", + "print(df.shape)\n", + "\n", + "#Month Monthly milk production: pounds per cow Number of Cows\n", + "#0 07-Feb 589.0 30.0\n", + "#1 07-Mar 561.0 32.0\n", + "#2 07-Apr " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.info()\n", + "\n", + "#\n", + "#RangeIndex: 168 entries, 0 to 167\n", + "#Data columns (total 3 columns):\n", + "# # Column Non-Null Count Dtype \n", + "#--- ------ -------------- ----- \n", + "# 0 Month 168 non-null object \n", + "# 1 Monthly milk production: pounds per cow 150 non-null float64\n", + "# 2 Number of Cows 151 non-null float64\n", + "#dtypes: float64(2), object(1)\n", + "#memory usage: 4.1+ KB" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.isnull().sum(axis = 0)\n", + "\n", + "#Month 0\n", + "#Monthly milk production: pounds per cow 18\n", + "#Number of Cows 17\n", + "#dtype: int64\n", + "#We can see from the functions we used that there are 18 rows missing in Monthly milk production: pounds per cow, and 17 rows missing from Number of Cows." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Challenge\n", + "Fill out the missing values in the monthly milk production column with the median, and fill out the number of cows column using the ffill method.\n", + "\n", + "After filling in the missing values with our new data, answer these questions for Dot, so they can figure out the value of having a cow year-round:\n", + "\n", + "What is the average for monthly milk production?\n", + "What is the standard deviation for monthly milk production?\n", + "What is the average number of cows used?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "median = df['Monthly milk production: pounds per cow'].median()\n", + "df['Monthly milk production: pounds per cow'] = df['Monthly milk production: pounds per cow'].fillna(value = median)\n", + "\n", + "df.isnull().sum(axis = 0)\n", + "\n", + "#Month 0\n", + "#Monthly milk production: pounds per cow 0\n", + "#Number of Cows 17\n", + "#dtype: int64" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df['Number of Cows'] = df['Number of Cows'].fillna(method = 'ffill')\n", + "df.isnull().sum(axis = 0)\n", + "\n", + "#Month 0\n", + "#Monthly milk production: pounds per cow 0\n", + "#Number of Cows 0\n", + "#dtype: int64" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#q1 \n", + "df['Monthly milk production: pounds per cow'].mean()\n", + "\n", + "#748.0535714285714" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#q2\n", + "df['Monthly milk production: pounds per cow'].std()\n", + "\n", + "#93.64783112594579" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#q3\n", + "df['Number of Cows'].mean()\n", + "\n", + "#49.898809523809526" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}