diff --git a/notebooks/07-visualizing-geospatial-data.ipynb b/notebooks/07-visualizing-geospatial-data.ipynb new file mode 100644 index 0000000..8e0816e --- /dev/null +++ b/notebooks/07-visualizing-geospatial-data.ipynb @@ -0,0 +1,145 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "77527494-9ecd-49d4-b5c1-f8c8250124ca", + "metadata": {}, + "outputs": [], + "source": [ + "from bokeh.io import output_notebook\n", + "\n", + "output_notebook()" + ] + }, + { + "cell_type": "markdown", + "id": "1d72683c-156a-45de-b49b-58928549e102", + "metadata": {}, + "source": [ + "## Data preparation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2f54b495-5d47-433e-8d96-ae0241a02296", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "file = \"../data/csv_files/US_census.csv\"\n", + "df = pd.read_csv(file)\n", + "\n", + "new_df = df.groupby(\"state\", as_index=False).agg(\n", + " income=(\"median_household_income\", \"mean\"), density=(\"density\", \"mean\")\n", + ")\n", + "\n", + "bins = [-1, 39_000, 50_000, 60_000, 70_000, 100_000]\n", + "labels = [\"<$40k\", \"$40k-$50k\", \"$50k-$60k\", \"$60k-$70k\", \">$70k\"]\n", + "colors = [\"#f1e0e3\", \"#d8bbc4\", \"#ba93ab\", \"#966B93\", \"#6B467A\"]\n", + "\n", + "new_df[\"income_range\"] = pd.cut(new_df[\"income\"], bins=bins, labels=labels, right=False)\n", + "new_df[\"colors\"] = pd.cut(new_df[\"income\"], bins=bins, labels=colors, right=False)\n", + "\n", + "new_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fbe81a2c-ee20-49c8-8035-1111150853e5", + "metadata": {}, + "outputs": [], + "source": [ + "from bokeh.sampledata.us_states import data\n", + "\n", + "us_df = (\n", + " pd.DataFrame(data)\n", + " .T.loc[lambda df: ~df[\"name\"].isin([\"Alaska\", \"Hawaii\"])]\n", + " .reset_index(drop=True)\n", + " .rename(columns={\"name\": \"state\"})\n", + " .drop(columns=\"region\")\n", + ")\n", + "\n", + "us_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecbc44a5-7a59-4c43-8e86-872ad604cb5f", + "metadata": {}, + "outputs": [], + "source": [ + "plot_df = new_df.merge(us_df, on=\"state\")\n", + "plot_df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "d733ec5e-74c9-48ee-b346-42fedf219ea1", + "metadata": {}, + "source": [ + "## Plotting" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0efc3827-9ad7-4ff0-a771-6b77f9f215cf", + "metadata": {}, + "outputs": [], + "source": [ + "from bokeh.models import ColumnDataSource\n", + "from bokeh.plotting import figure, show\n", + "\n", + "source = ColumnDataSource(data=plot_df)\n", + "\n", + "p = figure(\n", + " width=900,\n", + " height=600,\n", + " tooltips=[(\"State\", \"@state\"), (\"Average income\", \"@income_range\")],\n", + ")\n", + "\n", + "p.patches(\n", + " \"lons\",\n", + " \"lats\",\n", + " fill_color=\"colors\",\n", + " line_color=\"black\",\n", + " legend_field=\"income_range\",\n", + " source=source,\n", + ")\n", + "\n", + "p.yaxis.visible = False\n", + "p.xaxis.visible = False\n", + "p.grid.grid_line_color = None\n", + "p.legend.location = \"bottom_left\"\n", + "\n", + "show(p)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}