From f20679f183668bdbf25c424aee274c31c9b162dc Mon Sep 17 00:00:00 2001 From: Isaiah Akorita Date: Wed, 13 Sep 2023 15:34:09 +0100 Subject: [PATCH 1/2] created normal sized cartogram --- .../07-visualizing-geospatial-data.ipynb | 138 ++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 notebooks/07-visualizing-geospatial-data.ipynb diff --git a/notebooks/07-visualizing-geospatial-data.ipynb b/notebooks/07-visualizing-geospatial-data.ipynb new file mode 100644 index 0000000..e495704 --- /dev/null +++ b/notebooks/07-visualizing-geospatial-data.ipynb @@ -0,0 +1,138 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "77527494-9ecd-49d4-b5c1-f8c8250124ca", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from bokeh.io import output_notebook\n", + "\n", + "output_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cbef94c2-16a8-4f81-b32c-5484e697c866", + "metadata": {}, + "outputs": [], + "source": [ + "file = \"../data/csv_files/US_census.csv\"\n", + "\n", + "df = pd.read_csv(file)\n", + "\n", + "new_df = (\n", + " df.groupby(\"state\")\n", + " .agg({\"median_household_income\": \"mean\", \"density\": \"mean\"})\n", + " .reset_index()\n", + ")\n", + "\n", + "new_df = new_df.rename(columns={\"median_household_income\": \"income\"})\n", + "\n", + "bins = [-1, 39_000, 50_000, 60_000, 70_000, 100_000]\n", + "labels = [\"<$40k\", \"$40k-$50k\", \"$50k-$60k\", \"$60k-$70k\", \">$70k\"]\n", + "colors = [\"#f1e0e3\", \"#d8bbc4\", \"#ba93ab\", \"#966B93\", \"#6B467A\"]\n", + "\n", + "new_df[\"income_range\"] = pd.cut(new_df[\"income\"], bins=bins, labels=labels, right=False)\n", + "new_df[\"colors\"] = pd.cut(new_df[\"income\"], bins=bins, labels=colors, right=False)\n", + "\n", + "new_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fbe81a2c-ee20-49c8-8035-1111150853e5", + "metadata": {}, + "outputs": [], + "source": [ + "from bokeh.sampledata.us_states import data\n", + "\n", + "us_df = pd.DataFrame(data).T\n", + "us_df = us_df[~us_df[\"name\"].isin([\"Alaska\", \"Hawaii\"])]\n", + "us_df[\"lons\"] = us_df.lons.values.tolist()\n", + "us_df[\"lats\"] = us_df.lats.values.tolist()\n", + "us_df = us_df.reset_index(drop=True)\n", + "us_df = us_df.rename(columns={\"name\": \"state\"})\n", + "us_df = us_df.drop(\"region\", axis=1)\n", + "\n", + "us_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecbc44a5-7a59-4c43-8e86-872ad604cb5f", + "metadata": {}, + "outputs": [], + "source": [ + "plot_df = new_df.merge(us_df, on=\"state\")\n", + "plot_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0efc3827-9ad7-4ff0-a771-6b77f9f215cf", + "metadata": {}, + "outputs": [], + "source": [ + "from bokeh.models import ColumnDataSource\n", + "from bokeh.plotting import figure, show\n", + "\n", + "source = ColumnDataSource(data=plot_df)\n", + "\n", + "p = figure(width=900, height=600, tooltips=[(\"State\", \"@state\")])\n", + "\n", + "p.patches(\n", + " \"lons\",\n", + " \"lats\",\n", + " fill_color=\"colors\",\n", + " line_color=\"black\",\n", + " legend_field=\"income_range\",\n", + " source=source,\n", + ")\n", + "\n", + "p.yaxis.visible = False\n", + "p.xaxis.visible = False\n", + "p.grid.grid_line_color = None\n", + "p.legend.location = \"bottom_left\"\n", + "\n", + "show(p)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "783adb92-67fe-434f-af15-0438c6af6992", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 9f6f2e7d03fd6691c4b7e2b8438e570ce0726018 Mon Sep 17 00:00:00 2001 From: Isaiah Akorita Date: Fri, 30 Aug 2024 17:50:34 +0100 Subject: [PATCH 2/2] refactored code --- .../07-visualizing-geospatial-data.ipynb | 59 +++++++++++-------- 1 file changed, 33 insertions(+), 26 deletions(-) diff --git a/notebooks/07-visualizing-geospatial-data.ipynb b/notebooks/07-visualizing-geospatial-data.ipynb index e495704..8e0816e 100644 --- a/notebooks/07-visualizing-geospatial-data.ipynb +++ b/notebooks/07-visualizing-geospatial-data.ipynb @@ -7,32 +7,35 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", - "import numpy as np\n", "from bokeh.io import output_notebook\n", "\n", "output_notebook()" ] }, + { + "cell_type": "markdown", + "id": "1d72683c-156a-45de-b49b-58928549e102", + "metadata": {}, + "source": [ + "## Data preparation" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "cbef94c2-16a8-4f81-b32c-5484e697c866", + "id": "2f54b495-5d47-433e-8d96-ae0241a02296", "metadata": {}, "outputs": [], "source": [ - "file = \"../data/csv_files/US_census.csv\"\n", + "import pandas as pd\n", "\n", + "file = \"../data/csv_files/US_census.csv\"\n", "df = pd.read_csv(file)\n", "\n", - "new_df = (\n", - " df.groupby(\"state\")\n", - " .agg({\"median_household_income\": \"mean\", \"density\": \"mean\"})\n", - " .reset_index()\n", + "new_df = df.groupby(\"state\", as_index=False).agg(\n", + " income=(\"median_household_income\", \"mean\"), density=(\"density\", \"mean\")\n", ")\n", "\n", - "new_df = new_df.rename(columns={\"median_household_income\": \"income\"})\n", - "\n", "bins = [-1, 39_000, 50_000, 60_000, 70_000, 100_000]\n", "labels = [\"<$40k\", \"$40k-$50k\", \"$50k-$60k\", \"$60k-$70k\", \">$70k\"]\n", "colors = [\"#f1e0e3\", \"#d8bbc4\", \"#ba93ab\", \"#966B93\", \"#6B467A\"]\n", @@ -52,13 +55,13 @@ "source": [ "from bokeh.sampledata.us_states import data\n", "\n", - "us_df = pd.DataFrame(data).T\n", - "us_df = us_df[~us_df[\"name\"].isin([\"Alaska\", \"Hawaii\"])]\n", - "us_df[\"lons\"] = us_df.lons.values.tolist()\n", - "us_df[\"lats\"] = us_df.lats.values.tolist()\n", - "us_df = us_df.reset_index(drop=True)\n", - "us_df = us_df.rename(columns={\"name\": \"state\"})\n", - "us_df = us_df.drop(\"region\", axis=1)\n", + "us_df = (\n", + " pd.DataFrame(data)\n", + " .T.loc[lambda df: ~df[\"name\"].isin([\"Alaska\", \"Hawaii\"])]\n", + " .reset_index(drop=True)\n", + " .rename(columns={\"name\": \"state\"})\n", + " .drop(columns=\"region\")\n", + ")\n", "\n", "us_df.head()" ] @@ -74,6 +77,14 @@ "plot_df.head()" ] }, + { + "cell_type": "markdown", + "id": "d733ec5e-74c9-48ee-b346-42fedf219ea1", + "metadata": {}, + "source": [ + "## Plotting" + ] + }, { "cell_type": "code", "execution_count": null, @@ -86,7 +97,11 @@ "\n", "source = ColumnDataSource(data=plot_df)\n", "\n", - "p = figure(width=900, height=600, tooltips=[(\"State\", \"@state\")])\n", + "p = figure(\n", + " width=900,\n", + " height=600,\n", + " tooltips=[(\"State\", \"@state\"), (\"Average income\", \"@income_range\")],\n", + ")\n", "\n", "p.patches(\n", " \"lons\",\n", @@ -104,14 +119,6 @@ "\n", "show(p)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "783adb92-67fe-434f-af15-0438c6af6992", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": {