From 30f8d20abbe39f74e29fee7b1dcb50fd52676171 Mon Sep 17 00:00:00 2001 From: Matthew Date: Tue, 13 Nov 2018 18:29:23 -0500 Subject: [PATCH] Created using Colaboratory --- ...22_Choose_appropriate_visualizations.ipynb | 1644 ++++++++++------- 1 file changed, 993 insertions(+), 651 deletions(-) diff --git a/module2-choose-appropriate-visualizations/LS_DS_122_Choose_appropriate_visualizations.ipynb b/module2-choose-appropriate-visualizations/LS_DS_122_Choose_appropriate_visualizations.ipynb index 964e477..eee8bf7 100644 --- a/module2-choose-appropriate-visualizations/LS_DS_122_Choose_appropriate_visualizations.ipynb +++ b/module2-choose-appropriate-visualizations/LS_DS_122_Choose_appropriate_visualizations.ipynb @@ -1,652 +1,994 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "_Lambda School Data Science_\n", - "# Choose appropriate visualizations" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Upgrade Seaborn\n", - "\n", - "Make sure you have at least version 0.9.0" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!pip install --upgrade seaborn" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import seaborn as sns\n", - "sns.__version__" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Fix misleading visualizations" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib inline\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!wget https://raw.githubusercontent.com/LambdaSchool/DS-Sprint-02-Storytelling-With-Data/master/module2-choose-appropriate-visualizations/misleading.py\n", - " \n", - "import misleading" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Fix misleading plot #1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "misleading.plot1()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Fix misleading plot #2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "misleading.plot2()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Fix misleading plot #3" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "misleading.plot3()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Fix misleading plot #4" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "_If you're on Jupyter (not Colab) then uncomment and run this cell below:_" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# import altair as alt\n", - "# alt.renderers.enable('notebook')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "misleading.plot4()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Links\n", - "- [How to Spot Visualization Lies](https://flowingdata.com/2017/02/09/how-to-spot-visualization-lies/)\n", - "- [Where to Start and End Your Y-Axis Scale](http://stephanieevergreen.com/y-axis/)\n", - "- [xkcd heatmap](https://xkcd.com/1138/)\n", - "- [Surprise Maps: Showing the Unexpected](https://medium.com/@uwdata/surprise-maps-showing-the-unexpected-e92b67398865)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Use Seaborn to visualize distributions and relationships with continuous and discrete variables\n", - "\n", - "#### Links\n", - "- [Seaborn tutorial](https://seaborn.pydata.org/tutorial.html)\n", - "- [Seaborn example gallery](https://seaborn.pydata.org/examples/index.html)\n", - "- [Chart Chooser](https://extremepresentation.typepad.com/files/choosing-a-good-chart-09.pdf)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. Anscombe dataset" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Load dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df = sns.load_dataset('anscombe')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### See the data's shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### See the data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### [Group by](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.groupby.html) `'dataset'`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### [Describe](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.describe.html) the groups" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the [count](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.count.html), for each column in each group" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the [mean](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.mean.html) ..." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the [standard deviation](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.std.html) ..." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the [correlation](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.corr.html) ..." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Use pandas to [plot](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.plot.html) the groups, as scatter plots" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Use Seaborn to make [relational plots](http://seaborn.pydata.org/generated/seaborn.relplot.html)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Use Seaborn to make [linear model plots](http://seaborn.pydata.org/generated/seaborn.lmplot.html)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Links\n", - "- [Seaborn examples: Anscombe's quartet](http://seaborn.pydata.org/examples/anscombes_quartet.html)\n", - "- [Wikipedia: Anscombe's quartet](https://en.wikipedia.org/wiki/Anscombe%27s_quartet)\n", - "- [The Datasaurus Dozen](https://www.autodeskresearch.com/publications/samestats)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. Tips dataset" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Load dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "tips = sns.load_dataset('tips')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### See the data's shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### See the first 5 rows" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Describe the data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Make univariate [distribution plots](https://seaborn.pydata.org/generated/seaborn.distplot.html)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Make bivariate [relational plots](https://seaborn.pydata.org/generated/seaborn.relplot.html)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Make univariate [categorical plots](https://seaborn.pydata.org/generated/seaborn.catplot.html)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Make bivariate [categorical plots](https://seaborn.pydata.org/generated/seaborn.catplot.html)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3. Flights" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Load dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "flights = sns.load_dataset('flights')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### See the data's shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### See the first 5 rows" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Describe the data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Plot year & passengers" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Plot month & passengers" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create a [pivot table](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.pivot_table.html) of passengers by month and year" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Plot the pivot table as a [heat map](https://seaborn.pydata.org/generated/seaborn.heatmap.html)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "LS_DS_122_Choose_appropriate_visualizations.ipynb", + "version": "0.3.2", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + } + }, + "cells": [ + { + "metadata": { + "id": "UY3UbFj4ZzH_", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "_Lambda School Data Science_\n", + "# Choose appropriate visualizations" + ] + }, + { + "metadata": { + "id": "_hMLw4M3ZzIH", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "# Upgrade Seaborn\n", + "\n", + "Make sure you have at least version 0.9.0" + ] + }, + { + "metadata": { + "id": "QAA7c9DxZzIL", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "# This is a comment\n", + "!pip install --upgrade seaborn" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "Gwgrl7jcZzIY", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "import seaborn as sns\n", + "sns.__version__" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "3rnsqMBLZzIi", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "# Fix misleading visualizations" + ] + }, + { + "metadata": { + "id": "aJ7XlZYAZzIm", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "%matplotlib inline\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "ay--1xa-ZzIv", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "!wget https://raw.githubusercontent.com/LambdaSchool/DS-Sprint-02-Storytelling-With-Data/master/module2-choose-appropriate-visualizations/misleading.py\n", + " \n", + "import misleading" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "J6EwJoGQZzI3", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Fix misleading plot #1" + ] + }, + { + "metadata": { + "id": "bhcF5BpRZzI6", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "misleading.plot1()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "u85ewwDGZzJF", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "eHwjBtxhZzJO", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Fix misleading plot #2" + ] + }, + { + "metadata": { + "id": "LF93FVpDZzJR", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "misleading.plot2()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "4SU32Cj3ZzJc", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "Jtv5n4anZzJl", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Fix misleading plot #3" + ] + }, + { + "metadata": { + "id": "TnuqUSAgZzJn", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "misleading.plot3()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "cSBua7KsZzJt", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "e9yhiGC_ZzJz", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Fix misleading plot #4" + ] + }, + { + "metadata": { + "id": "hX7godplZzJ2", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "_If you're on Jupyter (not Colab) then uncomment and run this cell below:_" + ] + }, + { + "metadata": { + "id": "yoLtyt-KZzJ5", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "# import altair as alt\n", + "# alt.renderers.enable('notebook')" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "RqcYNQzlZzKA", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "misleading.plot4()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "t3QfswQZZzKH", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "S1SaWh2ZZzKM", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Links\n", + "- [How to Spot Visualization Lies](https://flowingdata.com/2017/02/09/how-to-spot-visualization-lies/)\n", + "- [Where to Start and End Your Y-Axis Scale](http://stephanieevergreen.com/y-axis/)\n", + "- [xkcd heatmap](https://xkcd.com/1138/)\n", + "- [Surprise Maps: Showing the Unexpected](https://medium.com/@uwdata/surprise-maps-showing-the-unexpected-e92b67398865)" + ] + }, + { + "metadata": { + "id": "Z6MbJGIRZzKN", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "# Use Seaborn to visualize distributions and relationships with continuous and discrete variables\n", + "\n", + "#### Links\n", + "- [Seaborn tutorial](https://seaborn.pydata.org/tutorial.html)\n", + "- [Seaborn example gallery](https://seaborn.pydata.org/examples/index.html)\n", + "- [Chart Chooser](https://extremepresentation.typepad.com/files/choosing-a-good-chart-09.pdf)" + ] + }, + { + "metadata": { + "id": "6ORN5aujZzKP", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "## 1. Anscombe dataset" + ] + }, + { + "metadata": { + "id": "vwPZGAufZzKU", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### Load dataset" + ] + }, + { + "metadata": { + "id": "tdgg0a_3ZzKX", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "df = sns.load_dataset('anscombe')" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "s64pMpgvZzKe", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### See the data's shape" + ] + }, + { + "metadata": { + "id": "0ODHpZnVZzKg", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "f9n2LcpBZzKl", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### See the data" + ] + }, + { + "metadata": { + "id": "E_NUtfCZZzKo", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "IwhIfw97ZzKw", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### [Group by](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.groupby.html) `'dataset'`" + ] + }, + { + "metadata": { + "id": "Edv_8iMoZzKy", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "j411Z1WDZzK9", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### [Describe](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.describe.html) the groups" + ] + }, + { + "metadata": { + "id": "YOsv1VQDZzK_", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "ij06orZuZzLD", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### Get the [count](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.count.html), for each column in each group" + ] + }, + { + "metadata": { + "id": "90iA_c07ZzLE", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "DBdXYe4rZzLM", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### Get the [mean](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.mean.html) ..." + ] + }, + { + "metadata": { + "id": "SyiTp9N-ZzLN", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "x6C2qnNcZzLX", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### Get the [standard deviation](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.std.html) ..." + ] + }, + { + "metadata": { + "id": "_6bYoihnZzLa", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "O3iJPQvLZzLj", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### Get the [correlation](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.corr.html) ..." + ] + }, + { + "metadata": { + "id": "hC8Oxk0IZzLk", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "yJ02LP8KZzLp", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### Use pandas to [plot](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.plot.html) the groups, as scatter plots" + ] + }, + { + "metadata": { + "id": "UyW1yPkoZzLr", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "6K-m-SewZzLy", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### Use Seaborn to make [relational plots](http://seaborn.pydata.org/generated/seaborn.relplot.html)" + ] + }, + { + "metadata": { + "id": "-7vUia46ZzL0", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "drU5Rhz0ZzL3", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### Use Seaborn to make [linear model plots](http://seaborn.pydata.org/generated/seaborn.lmplot.html)" + ] + }, + { + "metadata": { + "id": "hVRUYDBUZzL4", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "yAsmUjVXZzL8", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Links\n", + "- [Seaborn examples: Anscombe's quartet](http://seaborn.pydata.org/examples/anscombes_quartet.html)\n", + "- [Wikipedia: Anscombe's quartet](https://en.wikipedia.org/wiki/Anscombe%27s_quartet)\n", + "- [The Datasaurus Dozen](https://www.autodeskresearch.com/publications/samestats)" + ] + }, + { + "metadata": { + "id": "4ub8opInZzL9", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "## 2. Tips dataset" + ] + }, + { + "metadata": { + "id": "0G6-emmMZzL_", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### Load dataset" + ] + }, + { + "metadata": { + "id": "BmP16jt_ZzMA", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "tips = sns.load_dataset('tips')" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "37CARn6NZzMD", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### See the data's shape" + ] + }, + { + "metadata": { + "id": "dUl2UrLbZzME", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "jebZ2O6sZzMJ", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### See the first 5 rows" + ] + }, + { + "metadata": { + "id": "88RZK7jAZzMJ", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "iBOANvxbZzMO", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### Describe the data" + ] + }, + { + "metadata": { + "id": "K63vHYukZzMP", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "56QC_mBqZzMW", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### Make univariate [distribution plots](https://seaborn.pydata.org/generated/seaborn.distplot.html)" + ] + }, + { + "metadata": { + "id": "Ti7T0uumZzMX", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "DF2eCvhYZzMb", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### Make bivariate [relational plots](https://seaborn.pydata.org/generated/seaborn.relplot.html)" + ] + }, + { + "metadata": { + "id": "rk3nR2LkZzMc", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "vLjksRy2ZzMl", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### Make univariate [categorical plots](https://seaborn.pydata.org/generated/seaborn.catplot.html)" + ] + }, + { + "metadata": { + "id": "8mhmG809ZzMn", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "A_9GpQotZzMt", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### Make bivariate [categorical plots](https://seaborn.pydata.org/generated/seaborn.catplot.html)" + ] + }, + { + "metadata": { + "id": "DUBKmP4qZzMv", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "yOhzAM6YZzMy", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "## 3. Flights" + ] + }, + { + "metadata": { + "id": "iuZ4AYxfZzM0", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### Load dataset" + ] + }, + { + "metadata": { + "id": "u1qUjbupZzM0", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "flights = sns.load_dataset('flights')" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "_u1Od3ZaZzM4", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### See the data's shape" + ] + }, + { + "metadata": { + "id": "7K4BrKf4ZzM4", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "pIFlF--1ZzM8", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### See the first 5 rows" + ] + }, + { + "metadata": { + "id": "ZfyuLck_ZzM9", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "iy1RZ4rtZzNA", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### Describe the data" + ] + }, + { + "metadata": { + "id": "lb3GZQM9ZzNB", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "UVBP9u6dZzND", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### Plot year & passengers" + ] + }, + { + "metadata": { + "id": "aIGXUwP1ZzNE", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "KKq0FJppZzNH", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### Plot month & passengers" + ] + }, + { + "metadata": { + "id": "ol9a51CxZzNI", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "vVi4__xvZzNQ", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### Create a [pivot table](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.pivot_table.html) of passengers by month and year" + ] + }, + { + "metadata": { + "id": "Y32FaHb0ZzNR", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "bJCCoqX5ZzNV", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### Plot the pivot table as a [heat map](https://seaborn.pydata.org/generated/seaborn.heatmap.html)" + ] + }, + { + "metadata": { + "id": "1LBKs6SIZzNV", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + } + ] +} \ No newline at end of file