From f20679f183668bdbf25c424aee274c31c9b162dc Mon Sep 17 00:00:00 2001
From: Isaiah Akorita <akoritaisaiah@gmail.com>
Date: Wed, 13 Sep 2023 15:34:09 +0100
Subject: [PATCH 1/2] created normal sized cartogram

---
 .../07-visualizing-geospatial-data.ipynb      | 138 ++++++++++++++++++
 1 file changed, 138 insertions(+)
 create mode 100644 notebooks/07-visualizing-geospatial-data.ipynb

diff --git a/notebooks/07-visualizing-geospatial-data.ipynb b/notebooks/07-visualizing-geospatial-data.ipynb
new file mode 100644
index 0000000..e495704
--- /dev/null
+++ b/notebooks/07-visualizing-geospatial-data.ipynb
@@ -0,0 +1,138 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "77527494-9ecd-49d4-b5c1-f8c8250124ca",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from bokeh.io import output_notebook\n",
+    "\n",
+    "output_notebook()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cbef94c2-16a8-4f81-b32c-5484e697c866",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "file = \"../data/csv_files/US_census.csv\"\n",
+    "\n",
+    "df = pd.read_csv(file)\n",
+    "\n",
+    "new_df = (\n",
+    "    df.groupby(\"state\")\n",
+    "    .agg({\"median_household_income\": \"mean\", \"density\": \"mean\"})\n",
+    "    .reset_index()\n",
+    ")\n",
+    "\n",
+    "new_df = new_df.rename(columns={\"median_household_income\": \"income\"})\n",
+    "\n",
+    "bins = [-1, 39_000, 50_000, 60_000, 70_000, 100_000]\n",
+    "labels = [\"<$40k\", \"$40k-$50k\", \"$50k-$60k\", \"$60k-$70k\", \">$70k\"]\n",
+    "colors = [\"#f1e0e3\", \"#d8bbc4\", \"#ba93ab\", \"#966B93\", \"#6B467A\"]\n",
+    "\n",
+    "new_df[\"income_range\"] = pd.cut(new_df[\"income\"], bins=bins, labels=labels, right=False)\n",
+    "new_df[\"colors\"] = pd.cut(new_df[\"income\"], bins=bins, labels=colors, right=False)\n",
+    "\n",
+    "new_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fbe81a2c-ee20-49c8-8035-1111150853e5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from bokeh.sampledata.us_states import data\n",
+    "\n",
+    "us_df = pd.DataFrame(data).T\n",
+    "us_df = us_df[~us_df[\"name\"].isin([\"Alaska\", \"Hawaii\"])]\n",
+    "us_df[\"lons\"] = us_df.lons.values.tolist()\n",
+    "us_df[\"lats\"] = us_df.lats.values.tolist()\n",
+    "us_df = us_df.reset_index(drop=True)\n",
+    "us_df = us_df.rename(columns={\"name\": \"state\"})\n",
+    "us_df = us_df.drop(\"region\", axis=1)\n",
+    "\n",
+    "us_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ecbc44a5-7a59-4c43-8e86-872ad604cb5f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_df = new_df.merge(us_df, on=\"state\")\n",
+    "plot_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0efc3827-9ad7-4ff0-a771-6b77f9f215cf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from bokeh.models import ColumnDataSource\n",
+    "from bokeh.plotting import figure, show\n",
+    "\n",
+    "source = ColumnDataSource(data=plot_df)\n",
+    "\n",
+    "p = figure(width=900, height=600, tooltips=[(\"State\", \"@state\")])\n",
+    "\n",
+    "p.patches(\n",
+    "    \"lons\",\n",
+    "    \"lats\",\n",
+    "    fill_color=\"colors\",\n",
+    "    line_color=\"black\",\n",
+    "    legend_field=\"income_range\",\n",
+    "    source=source,\n",
+    ")\n",
+    "\n",
+    "p.yaxis.visible = False\n",
+    "p.xaxis.visible = False\n",
+    "p.grid.grid_line_color = None\n",
+    "p.legend.location = \"bottom_left\"\n",
+    "\n",
+    "show(p)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "783adb92-67fe-434f-af15-0438c6af6992",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From 9f6f2e7d03fd6691c4b7e2b8438e570ce0726018 Mon Sep 17 00:00:00 2001
From: Isaiah Akorita <akoritaisaiah@gmail.com>
Date: Fri, 30 Aug 2024 17:50:34 +0100
Subject: [PATCH 2/2] refactored code

---
 .../07-visualizing-geospatial-data.ipynb      | 59 +++++++++++--------
 1 file changed, 33 insertions(+), 26 deletions(-)

diff --git a/notebooks/07-visualizing-geospatial-data.ipynb b/notebooks/07-visualizing-geospatial-data.ipynb
index e495704..8e0816e 100644
--- a/notebooks/07-visualizing-geospatial-data.ipynb
+++ b/notebooks/07-visualizing-geospatial-data.ipynb
@@ -7,32 +7,35 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pandas as pd\n",
-    "import numpy as np\n",
     "from bokeh.io import output_notebook\n",
     "\n",
     "output_notebook()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "1d72683c-156a-45de-b49b-58928549e102",
+   "metadata": {},
+   "source": [
+    "## Data preparation"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "cbef94c2-16a8-4f81-b32c-5484e697c866",
+   "id": "2f54b495-5d47-433e-8d96-ae0241a02296",
    "metadata": {},
    "outputs": [],
    "source": [
-    "file = \"../data/csv_files/US_census.csv\"\n",
+    "import pandas as pd\n",
     "\n",
+    "file = \"../data/csv_files/US_census.csv\"\n",
     "df = pd.read_csv(file)\n",
     "\n",
-    "new_df = (\n",
-    "    df.groupby(\"state\")\n",
-    "    .agg({\"median_household_income\": \"mean\", \"density\": \"mean\"})\n",
-    "    .reset_index()\n",
+    "new_df = df.groupby(\"state\", as_index=False).agg(\n",
+    "    income=(\"median_household_income\", \"mean\"), density=(\"density\", \"mean\")\n",
     ")\n",
     "\n",
-    "new_df = new_df.rename(columns={\"median_household_income\": \"income\"})\n",
-    "\n",
     "bins = [-1, 39_000, 50_000, 60_000, 70_000, 100_000]\n",
     "labels = [\"<$40k\", \"$40k-$50k\", \"$50k-$60k\", \"$60k-$70k\", \">$70k\"]\n",
     "colors = [\"#f1e0e3\", \"#d8bbc4\", \"#ba93ab\", \"#966B93\", \"#6B467A\"]\n",
@@ -52,13 +55,13 @@
    "source": [
     "from bokeh.sampledata.us_states import data\n",
     "\n",
-    "us_df = pd.DataFrame(data).T\n",
-    "us_df = us_df[~us_df[\"name\"].isin([\"Alaska\", \"Hawaii\"])]\n",
-    "us_df[\"lons\"] = us_df.lons.values.tolist()\n",
-    "us_df[\"lats\"] = us_df.lats.values.tolist()\n",
-    "us_df = us_df.reset_index(drop=True)\n",
-    "us_df = us_df.rename(columns={\"name\": \"state\"})\n",
-    "us_df = us_df.drop(\"region\", axis=1)\n",
+    "us_df = (\n",
+    "    pd.DataFrame(data)\n",
+    "    .T.loc[lambda df: ~df[\"name\"].isin([\"Alaska\", \"Hawaii\"])]\n",
+    "    .reset_index(drop=True)\n",
+    "    .rename(columns={\"name\": \"state\"})\n",
+    "    .drop(columns=\"region\")\n",
+    ")\n",
     "\n",
     "us_df.head()"
    ]
@@ -74,6 +77,14 @@
     "plot_df.head()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "d733ec5e-74c9-48ee-b346-42fedf219ea1",
+   "metadata": {},
+   "source": [
+    "## Plotting"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -86,7 +97,11 @@
     "\n",
     "source = ColumnDataSource(data=plot_df)\n",
     "\n",
-    "p = figure(width=900, height=600, tooltips=[(\"State\", \"@state\")])\n",
+    "p = figure(\n",
+    "    width=900,\n",
+    "    height=600,\n",
+    "    tooltips=[(\"State\", \"@state\"), (\"Average income\", \"@income_range\")],\n",
+    ")\n",
     "\n",
     "p.patches(\n",
     "    \"lons\",\n",
@@ -104,14 +119,6 @@
     "\n",
     "show(p)"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "783adb92-67fe-434f-af15-0438c6af6992",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {