From 565dd2309252d48aa856659e127def852c3e4238 Mon Sep 17 00:00:00 2001 From: "Navid C. Constantinou" Date: Sat, 31 Aug 2024 10:50:19 +1000 Subject: [PATCH] apply review suggestions --- Recipes/Nearest_Neighbour_Distance.ipynb | 1704 ++++++++++++++++++---- 1 file changed, 1415 insertions(+), 289 deletions(-) diff --git a/Recipes/Nearest_Neighbour_Distance.ipynb b/Recipes/Nearest_Neighbour_Distance.ipynb index 647544aa..572bbe68 100644 --- a/Recipes/Nearest_Neighbour_Distance.ipynb +++ b/Recipes/Nearest_Neighbour_Distance.ipynb @@ -32,11 +32,11 @@ "outputs": [], "source": [ "import intake\n", - "catalog = intake.cat.access_nri\n", - "import pandas as pd\n", + "\n", "import xarray as xr\n", "import numpy as np\n", "import datetime as dt\n", + "\n", "from sklearn.neighbors import BallTree\n", "import matplotlib.pyplot as plt" ] @@ -64,13 +64,13 @@ "
\n", "
\n", "

Client

\n", - "

Client-b9f8614b-8805-11ee-a8b2-00000188fe80

\n", + "

Client-b18081f6-6732-11ef-9685-0000018ffe80

\n", " \n", "\n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", "\n", @@ -93,20 +93,57 @@ "\n", " \n", "
\n", - "

Scheduler Info

\n", + "

Cluster Info

\n", + "
\n", + "
\n", + "
\n", + "
\n", + "

LocalCluster

\n", + "

088c4d50

\n", + "
Connection method: DirectConnection method: Cluster objectCluster type: distributed.LocalCluster
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "\n", + " \n", + "
\n", + " Dashboard: /proxy/8787/status\n", + " \n", + " Workers: 8\n", + "
\n", + " Total threads: 48\n", + " \n", + " Total memory: 188.56 GiB\n", + "
Status: runningUsing processes: True
\n", + "\n", + "
\n", + " \n", + "

Scheduler Info

\n", + "
\n", + "\n", "
\n", "
\n", "
\n", "
\n", "

Scheduler

\n", - "

Scheduler-7c846815-9f3c-4666-a6ac-f4150b5ef1f2

\n", + "

Scheduler-86643f04-3935-4853-843e-f153c59b926b

\n", " \n", " \n", " \n", " \n", " \n", " \n", @@ -114,174 +151,1280 @@ " Dashboard:/proxy/8787/status\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", - " Comm: tcp://10.6.48.33:8786\n", + " Comm: tcp://127.0.0.1:45827\n", " \n", - " Workers: 1\n", + " Workers: 8\n", "
\n", - " Total threads: 12\n", + " Total threads: 48\n", "
\n", - " Started: 37 minutes ago\n", + " Started: Just now\n", " \n", - " Total memory: 46.00 GiB\n", + " Total memory: 188.56 GiB\n", "
\n", - "
\n", - "
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "

Workers

\n", + "
\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "

Worker: 0

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + "
\n", + " Comm: tcp://127.0.0.1:34301\n", + " \n", + " Total threads: 6\n", + "
\n", + " Dashboard: /proxy/38597/status\n", + " \n", + " Memory: 23.57 GiB\n", + "
\n", + " Nanny: tcp://127.0.0.1:46039\n", + "
\n", + " Local directory: /jobfs/123969626.gadi-pbs/dask-scratch-space/worker-irj2wqfd\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "

Worker: 1

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + "
\n", + " Comm: tcp://127.0.0.1:33457\n", + " \n", + " Total threads: 6\n", + "
\n", + " Dashboard: /proxy/35013/status\n", + " \n", + " Memory: 23.57 GiB\n", + "
\n", + " Nanny: tcp://127.0.0.1:40171\n", + "
\n", + " Local directory: /jobfs/123969626.gadi-pbs/dask-scratch-space/worker-xbghgbmy\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "

Worker: 2

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + "
\n", + " Comm: tcp://127.0.0.1:36755\n", + " \n", + " Total threads: 6\n", + "
\n", + " Dashboard: /proxy/34885/status\n", + " \n", + " Memory: 23.57 GiB\n", + "
\n", + " Nanny: tcp://127.0.0.1:35943\n", + "
\n", + " Local directory: /jobfs/123969626.gadi-pbs/dask-scratch-space/worker-0eacf3et\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "

Worker: 3

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + "
\n", + " Comm: tcp://127.0.0.1:34425\n", + " \n", + " Total threads: 6\n", + "
\n", + " Dashboard: /proxy/41651/status\n", + " \n", + " Memory: 23.57 GiB\n", + "
\n", + " Nanny: tcp://127.0.0.1:39671\n", + "
\n", + " Local directory: /jobfs/123969626.gadi-pbs/dask-scratch-space/worker-jp_mftlo\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "

Worker: 4

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + "
\n", + " Comm: tcp://127.0.0.1:38547\n", + " \n", + " Total threads: 6\n", + "
\n", + " Dashboard: /proxy/35225/status\n", + " \n", + " Memory: 23.57 GiB\n", + "
\n", + " Nanny: tcp://127.0.0.1:38939\n", + "
\n", + " Local directory: /jobfs/123969626.gadi-pbs/dask-scratch-space/worker-4w4x9avg\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "

Worker: 5

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + "
\n", + " Comm: tcp://127.0.0.1:43999\n", + " \n", + " Total threads: 6\n", + "
\n", + " Dashboard: /proxy/45311/status\n", + " \n", + " Memory: 23.57 GiB\n", + "
\n", + " Nanny: tcp://127.0.0.1:37339\n", + "
\n", + " Local directory: /jobfs/123969626.gadi-pbs/dask-scratch-space/worker-_3jmgkk5\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "

Worker: 6

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + "
\n", + " Comm: tcp://127.0.0.1:40331\n", + " \n", + " Total threads: 6\n", + "
\n", + " Dashboard: /proxy/34639/status\n", + " \n", + " Memory: 23.57 GiB\n", + "
\n", + " Nanny: tcp://127.0.0.1:37009\n", + "
\n", + " Local directory: /jobfs/123969626.gadi-pbs/dask-scratch-space/worker-42j0sofv\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "

Worker: 7

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + "
\n", + " Comm: tcp://127.0.0.1:34523\n", + " \n", + " Total threads: 6\n", + "
\n", + " Dashboard: /proxy/40891/status\n", + " \n", + " Memory: 23.57 GiB\n", + "
\n", + " Nanny: tcp://127.0.0.1:43513\n", + "
\n", + " Local directory: /jobfs/123969626.gadi-pbs/dask-scratch-space/worker-ttwwfcbl\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + " \n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from dask.distributed import Client\n", + "client = Client()\n", + "client" + ] + }, + { + "cell_type": "markdown", + "id": "c45e1335-cd0b-4773-9e8f-4df654d38aa8", + "metadata": {}, + "source": [ + "## Accessing ACCESS-OM2-01 data\n", + "\n", + "First we load the ACCESS-NRI default intake catalog." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "06213890-26f2-4f14-871f-523078d6c9a5", + "metadata": {}, + "outputs": [], + "source": [ + "catalog = intake.cat.access_nri" + ] + }, + { + "cell_type": "markdown", + "id": "50cbd6c4-b6ed-437a-8b82-37751cd66e6b", + "metadata": {}, + "source": [ + "We use monthly sea ice outputs. Here, we load only two months of sea ice data." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "b1de38e7-862a-4974-9a64-0b1046391571", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'aice_m' (time: 3, nj: 2700, ni: 3600)> Size: 117MB\n",
+       "dask.array<getitem, shape=(3, 2700, 3600), dtype=float32, chunksize=(1, 270, 360), chunktype=numpy.ndarray>\n",
+       "Coordinates:\n",
+       "  * time     (time) datetime64[ns] 24B 1978-01-01 1978-02-01 1978-03-01\n",
+       "    TLON     (nj, ni) float32 39MB dask.array<chunksize=(270, 360), meta=np.ndarray>\n",
+       "    TLAT     (nj, ni) float32 39MB dask.array<chunksize=(270, 360), meta=np.ndarray>\n",
+       "    ULON     (nj, ni) float32 39MB dask.array<chunksize=(270, 360), meta=np.ndarray>\n",
+       "    ULAT     (nj, ni) float32 39MB dask.array<chunksize=(270, 360), meta=np.ndarray>\n",
+       "Dimensions without coordinates: nj, ni\n",
+       "Attributes:\n",
+       "    units:          1\n",
+       "    long_name:      ice area  (aggregate)\n",
+       "    cell_measures:  area: tarea\n",
+       "    cell_methods:   time: mean\n",
+       "    time_rep:       averaged
" ], "text/plain": [ - "" + " Size: 117MB\n", + "dask.array\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 24B 1978-01-01 1978-02-01 1978-03-01\n", + " TLON (nj, ni) float32 39MB dask.array\n", + " TLAT (nj, ni) float32 39MB dask.array\n", + " ULON (nj, ni) float32 39MB dask.array\n", + " ULAT (nj, ni) float32 39MB dask.array\n", + "Dimensions without coordinates: nj, ni\n", + "Attributes:\n", + " units: 1\n", + " long_name: ice area (aggregate)\n", + " cell_measures: area: tarea\n", + " cell_methods: time: mean\n", + " time_rep: averaged" ] }, - "execution_count": 2, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "from dask.distributed import Client\n", - "client = Client(\"tcp://10.6.48.33:8786\")\n", - "client" - ] - }, - { - "cell_type": "markdown", - "id": "c45e1335-cd0b-4773-9e8f-4df654d38aa8", - "metadata": {}, - "source": [ - "## Accessing ACCESS-OM2-01 data\n", - "We will use monthly sea ice outputs from cycle four. For this example, we will load only two months of sea ice data. Note the use of `decode_coords = False`. This avoids lengthly delays in accessing sea ice data. See more information [here](https://forum.access-hive.org.au/t/issues-loading-access-om2-01-data-from-cycle-4/418/3)." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "b1de38e7-862a-4974-9a64-0b1046391571", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/g/data/hh5/public/apps/miniconda3/envs/analysis3-23.07/lib/python3.10/site-packages/intake_esm/cat.py:270: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n", - " .applymap(type)\n", - "/g/data/hh5/public/apps/miniconda3/envs/analysis3-23.07/lib/python3.10/site-packages/intake_esm/cat.py:270: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n", - " .applymap(type)\n", - "/g/data/hh5/public/apps/miniconda3/envs/analysis3-23.07/lib/python3.10/site-packages/intake_esm/cat.py:270: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n", - " .applymap(type)\n" - ] - } - ], "source": [ "cat_subset = catalog['01deg_jra55v140_iaf_cycle4']\n", - "var_search = cat_subset.search(variable='aice_m')\n", - "darray = var_search.to_dask()\n", - "darray = darray['aice_m']\n", - "darray = darray.sel(time=slice('1978-01', '1978-03'))\n", - "var_ice = darray" + "ds = cat_subset.search(variable='aice_m',\n", + ").to_dask(\n", + " xarray_combine_by_coords_kwargs=dict(compat=\"override\",\n", + " data_vars=\"minimal\",\n", + " coords=\"minimal\")\n", + ")\n", + "\n", + "var_ice = ds['aice_m']\n", + "var_ice = var_ice.sel(time=slice('1978-01', '1978-03'))\n", + "var_ice" ] }, { @@ -298,26 +1441,12 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "5f3c2530-9225-4b59-ba62-22f374698b00", "metadata": { "tags": [] }, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/g/data/hh5/public/apps/miniconda3/envs/analysis3-23.07/lib/python3.10/site-packages/intake_esm/cat.py:270: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n", - " .applymap(type)\n", - "/g/data/hh5/public/apps/miniconda3/envs/analysis3-23.07/lib/python3.10/site-packages/intake_esm/cat.py:270: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n", - " .applymap(type)\n", - "/g/data/hh5/public/apps/miniconda3/envs/analysis3-23.07/lib/python3.10/site-packages/intake_esm/cat.py:270: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n", - " .applymap(type)\n", - "/g/data/hh5/public/apps/miniconda3/envs/analysis3-23.07/lib/python3.10/site-packages/intake_esm/cat.py:270: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n", - " .applymap(type)\n" - ] - }, { "data": { "text/html": [ @@ -352,6 +1481,7 @@ "}\n", "\n", "html[theme=dark],\n", + "html[data-theme=dark],\n", "body[data-theme=dark],\n", "body.vscode-dark {\n", " --xr-font-color0: rgba(255, 255, 255, 1);\n", @@ -684,22 +1814,22 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
<xarray.DataArray 'aice_m' (time: 3, yt_ocean: 713, xt_ocean: 3600)>\n",
+       "
<xarray.DataArray 'aice_m' (time: 3, yt_ocean: 713, xt_ocean: 3600)> Size: 31MB\n",
        "dask.array<getitem, shape=(3, 713, 3600), dtype=float32, chunksize=(1, 270, 360), chunktype=numpy.ndarray>\n",
        "Coordinates:\n",
-       "    TLON      (yt_ocean, xt_ocean) float32 dask.array<chunksize=(243, 360), meta=np.ndarray>\n",
-       "    TLAT      (yt_ocean, xt_ocean) float32 dask.array<chunksize=(243, 360), meta=np.ndarray>\n",
-       "    ULON      (yt_ocean, xt_ocean) float32 dask.array<chunksize=(243, 360), meta=np.ndarray>\n",
-       "    ULAT      (yt_ocean, xt_ocean) float32 dask.array<chunksize=(243, 360), meta=np.ndarray>\n",
-       "  * time      (time) datetime64[ns] 1977-12-31T12:00:00 ... 1978-02-28T12:00:00\n",
-       "  * xt_ocean  (xt_ocean) float64 -279.9 -279.8 -279.7 ... 79.75 79.85 79.95\n",
-       "  * yt_ocean  (yt_ocean) float64 -79.97 -79.93 -79.88 ... -45.18 -45.11 -45.04\n",
+       "    TLON      (yt_ocean, xt_ocean) float32 10MB dask.array<chunksize=(243, 360), meta=np.ndarray>\n",
+       "    TLAT      (yt_ocean, xt_ocean) float32 10MB dask.array<chunksize=(243, 360), meta=np.ndarray>\n",
+       "    ULON      (yt_ocean, xt_ocean) float32 10MB dask.array<chunksize=(243, 360), meta=np.ndarray>\n",
+       "    ULAT      (yt_ocean, xt_ocean) float32 10MB dask.array<chunksize=(243, 360), meta=np.ndarray>\n",
+       "  * time      (time) datetime64[ns] 24B 1977-12-31T12:00:00 ... 1978-02-28T12...\n",
+       "  * xt_ocean  (xt_ocean) float64 29kB -279.9 -279.8 -279.7 ... 79.75 79.85 79.95\n",
+       "  * yt_ocean  (yt_ocean) float64 6kB -79.97 -79.93 -79.88 ... -45.11 -45.04\n",
        "Attributes:\n",
        "    units:          1\n",
        "    long_name:      ice area  (aggregate)\n",
        "    cell_measures:  area: tarea\n",
        "    cell_methods:   time: mean\n",
-       "    time_rep:       averaged