From 125adfff16c63244cd150613034d92000f9e231e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 6 Jan 2025 19:52:04 +0000
Subject: [PATCH] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 docs/drift_removal.ipynb                 | 148 ++++++++++------
 docs/postprocessing.ipynb                | 213 +++++++++++++++--------
 docs/regionmask.ipynb                    |  57 ++++--
 docs/tutorial.ipynb                      | 160 ++++++++++-------
 notebooks/add_more_models.ipynb          | 117 ++++++++-----
 notebooks/maintenance_grids.ipynb        | 133 +++++++-------
 notebooks/metric_parse_improvement.ipynb |  38 ++--
 notebooks/parse_area_gn.ipynb            |  20 ++-
 notebooks/testing_various_issues.ipynb   | 111 +++++++-----
 9 files changed, 620 insertions(+), 377 deletions(-)

diff --git a/docs/drift_removal.ipynb b/docs/drift_removal.ipynb
index 1809d51f..db578cdc 100644
--- a/docs/drift_removal.ipynb
+++ b/docs/drift_removal.ipynb
@@ -43,6 +43,7 @@
    ],
    "source": [
     "from dask_gateway import Gateway\n",
+    "\n",
     "g = Gateway()\n",
     "running_clusters = g.list_clusters()\n",
     "print(running_clusters)\n",
@@ -77,6 +78,7 @@
    "source": [
     "from distributed import Client\n",
     "from dask_gateway import GatewayCluster\n",
+    "\n",
     "cluster = GatewayCluster()\n",
     "cluster.scale(30)\n",
     "cluster"
@@ -255,18 +257,22 @@
     }
    ],
    "source": [
-    "zkwargs = {'consolidated':True, 'use_cftime':True}\n",
-    "kwargs = {'zarr_kwargs':zkwargs, 'preprocess':combined_preprocessing, 'aggregate':False}\n",
+    "zkwargs = {\"consolidated\": True, \"use_cftime\": True}\n",
+    "kwargs = {\n",
+    "    \"zarr_kwargs\": zkwargs,\n",
+    "    \"preprocess\": combined_preprocessing,\n",
+    "    \"aggregate\": False,\n",
+    "}\n",
     "\n",
     "col = google_cmip_col()\n",
     "\n",
     "\n",
-    "cat = col.search(source_id='CanESM5-CanOE', variable_id='thetao')\n",
+    "cat = col.search(source_id=\"CanESM5-CanOE\", variable_id=\"thetao\")\n",
     "\n",
     "\n",
-    "ddict_historical = cat.search(experiment_id='historical').to_dataset_dict(**kwargs)\n",
-    "ddict_ssp585 = cat.search(experiment_id='ssp585').to_dataset_dict(**kwargs)\n",
-    "ddict_picontrol = cat.search(experiment_id='piControl').to_dataset_dict(**kwargs)"
+    "ddict_historical = cat.search(experiment_id=\"historical\").to_dataset_dict(**kwargs)\n",
+    "ddict_ssp585 = cat.search(experiment_id=\"ssp585\").to_dataset_dict(**kwargs)\n",
+    "ddict_picontrol = cat.search(experiment_id=\"piControl\").to_dataset_dict(**kwargs)"
    ]
   },
   {
@@ -286,12 +292,18 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ds_control = ddict_picontrol['CMIP.CCCma.CanESM5-CanOE.piControl.r1i1p2f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/CCCma/CanESM5-CanOE/piControl/r1i1p2f1/Omon/thetao/gn/v20190429/.nan.20190429']\n",
-    "ds_historical = ddict_historical['CMIP.CCCma.CanESM5-CanOE.historical.r1i1p2f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/CCCma/CanESM5-CanOE/historical/r1i1p2f1/Omon/thetao/gn/v20190429/.nan.20190429']\n",
-    "ds_ssp585 = ddict_ssp585['ScenarioMIP.CCCma.CanESM5-CanOE.ssp585.r1i1p2f1.Omon.thetao.gn.gs://cmip6/CMIP6/ScenarioMIP/CCCma/CanESM5-CanOE/ssp585/r1i1p2f1/Omon/thetao/gn/v20190429/.nan.20190429']\n",
+    "ds_control = ddict_picontrol[\n",
+    "    \"CMIP.CCCma.CanESM5-CanOE.piControl.r1i1p2f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/CCCma/CanESM5-CanOE/piControl/r1i1p2f1/Omon/thetao/gn/v20190429/.nan.20190429\"\n",
+    "]\n",
+    "ds_historical = ddict_historical[\n",
+    "    \"CMIP.CCCma.CanESM5-CanOE.historical.r1i1p2f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/CCCma/CanESM5-CanOE/historical/r1i1p2f1/Omon/thetao/gn/v20190429/.nan.20190429\"\n",
+    "]\n",
+    "ds_ssp585 = ddict_ssp585[\n",
+    "    \"ScenarioMIP.CCCma.CanESM5-CanOE.ssp585.r1i1p2f1.Omon.thetao.gn.gs://cmip6/CMIP6/ScenarioMIP/CCCma/CanESM5-CanOE/ssp585/r1i1p2f1/Omon/thetao/gn/v20190429/.nan.20190429\"\n",
+    "]\n",
     "\n",
     "# Pick a random location in x/y/z space to use as an exmple\n",
-    "roi = {'x':100,'y':220, 'lev':30}"
+    "roi = {\"x\": 100, \"y\": 220, \"lev\": 30}"
    ]
   },
   {
@@ -335,8 +347,8 @@
    ],
    "source": [
     "# ok lets just plot them together\n",
-    "ds_control.isel(**roi).thetao.plot(color='0.5')\n",
-    "ds_historical.isel(**roi).thetao.plot(color='C1')"
+    "ds_control.isel(**roi).thetao.plot(color=\"0.5\")\n",
+    "ds_historical.isel(**roi).thetao.plot(color=\"C1\")"
    ]
   },
   {
@@ -373,7 +385,7 @@
     }
    ],
    "source": [
-    "{k:v for k,v in ds_historical.attrs.items() if 'parent' in k}"
+    "{k: v for k, v in ds_historical.attrs.items() if \"parent\" in k}"
    ]
   },
   {
@@ -428,8 +440,8 @@
    ],
    "source": [
     "# ok lets just plot them together\n",
-    "ds_control_adj.isel(**roi).thetao.plot(color='0.5')\n",
-    "ds_historical_adj.isel(**roi).thetao.plot(color='C1')"
+    "ds_control_adj.isel(**roi).thetao.plot(color=\"0.5\")\n",
+    "ds_historical_adj.isel(**roi).thetao.plot(color=\"C1\")"
    ]
   },
   {
@@ -471,8 +483,8 @@
    ],
    "source": [
     "# ok lets just plot them together\n",
-    "ds_control_adj.isel(**roi, time=slice(0,24)).thetao.plot()\n",
-    "ds_historical_adj.isel(**roi, time=slice(0,24)).thetao.plot()"
+    "ds_control_adj.isel(**roi, time=slice(0, 24)).thetao.plot()\n",
+    "ds_historical_adj.isel(**roi, time=slice(0, 24)).thetao.plot()"
    ]
   },
   {
@@ -491,8 +503,9 @@
    "outputs": [],
    "source": [
     "from xmip.drift_removal import replace_time\n",
+    "\n",
     "# with the defaults it will just replace the dates with new ones which have time stamps at the beginning of the month.\n",
-    "ds_historical_adj = replace_time(ds_historical_adj) "
+    "ds_historical_adj = replace_time(ds_historical_adj)"
    ]
   },
   {
@@ -526,8 +539,8 @@
    ],
    "source": [
     "# ok lets just plot them together again\n",
-    "ds_control_adj.isel(**roi, time=slice(0,24)).thetao.plot()\n",
-    "ds_historical_adj.isel(**roi, time=slice(0,24)).thetao.plot()"
+    "ds_control_adj.isel(**roi, time=slice(0, 24)).thetao.plot()\n",
+    "ds_historical_adj.isel(**roi, time=slice(0, 24)).thetao.plot()"
    ]
   },
   {
@@ -564,7 +577,7 @@
    ],
    "source": [
     "for name, ds in ddict_historical.items():\n",
-    "    print(name, ds.attrs['branch_time_in_parent'])"
+    "    print(name, ds.attrs[\"branch_time_in_parent\"])"
    ]
   },
   {
@@ -598,14 +611,18 @@
     "# replace the timestamp with the first of the month for the control run and plot\n",
     "# we will also average the data yearly to remove some of the visual noise\n",
     "\n",
-    "plt.figure(figsize=[12,4])\n",
-    "replace_time(ds_control).isel(**roi).thetao.coarsen(time=3).mean().isel(time=slice(0,150*4)).plot(color='0.5')\n",
+    "plt.figure(figsize=[12, 4])\n",
+    "replace_time(ds_control).isel(**roi).thetao.coarsen(time=3).mean().isel(\n",
+    "    time=slice(0, 150 * 4)\n",
+    ").plot(color=\"0.5\")\n",
     "\n",
-    "# now we loop through all the historical members, adjust the time and plot them in the same way, \n",
+    "# now we loop through all the historical members, adjust the time and plot them in the same way,\n",
     "# but only for the first 20 years\n",
     "for name, ds in ddict_historical.items():\n",
-    "    _, ds_adj = unify_time(ds_control, ds, adjust_to='parent')\n",
-    "    ds_adj.isel(**roi).thetao.coarsen(time=3).mean().isel(time=slice(0,30*4)).plot(color='C1')"
+    "    _, ds_adj = unify_time(ds_control, ds, adjust_to=\"parent\")\n",
+    "    ds_adj.isel(**roi).thetao.coarsen(time=3).mean().isel(time=slice(0, 30 * 4)).plot(\n",
+    "        color=\"C1\"\n",
+    "    )"
    ]
   },
   {
@@ -654,9 +671,10 @@
     "# setting up the scratch bucket\n",
     "import os\n",
     "import fsspec\n",
-    "PANGEO_SCRATCH = os.environ['PANGEO_SCRATCH']+'cmip6_pp_demo'\n",
-    "path = f'{PANGEO_SCRATCH}/test_rechunked.zarr'\n",
-    "temp_path = f'{PANGEO_SCRATCH}/test_rechunked_temp.zarr'\n",
+    "\n",
+    "PANGEO_SCRATCH = os.environ[\"PANGEO_SCRATCH\"] + \"cmip6_pp_demo\"\n",
+    "path = f\"{PANGEO_SCRATCH}/test_rechunked.zarr\"\n",
+    "temp_path = f\"{PANGEO_SCRATCH}/test_rechunked_temp.zarr\"\n",
     "mapper = fsspec.get_mapper(path)\n",
     "mapper_temp = fsspec.get_mapper(temp_path)"
    ]
@@ -1506,27 +1524,30 @@
    "source": [
     "if not mapper.fs.exists(path):\n",
     "    # recompute the rechunked data into the scratch bucket (is only triggered when the temporary store was erased)\n",
-    "    \n",
+    "\n",
     "    # Remove the temp store if for some reason that still exists\n",
     "    if mapper.fs.exists(temp_path):\n",
     "        mapper.fs.rm(temp_path, recursive=True)\n",
     "    from rechunker import rechunk\n",
+    "\n",
     "    target_chunks = {\n",
-    "        'thetao': {'time':6012, 'lev':1, 'x':3, 'y':291},\n",
-    "        'x': {'x':3},\n",
-    "        'y': {'y':291},\n",
-    "        'lat': {'x':3, 'y':291},\n",
-    "        'lev': {'lev':1},\n",
-    "        'lon': {'x':3, 'y':291},\n",
-    "        'time': {'time':6012}, \n",
+    "        \"thetao\": {\"time\": 6012, \"lev\": 1, \"x\": 3, \"y\": 291},\n",
+    "        \"x\": {\"x\": 3},\n",
+    "        \"y\": {\"y\": 291},\n",
+    "        \"lat\": {\"x\": 3, \"y\": 291},\n",
+    "        \"lev\": {\"lev\": 1},\n",
+    "        \"lon\": {\"x\": 3, \"y\": 291},\n",
+    "        \"time\": {\"time\": 6012},\n",
     "    }\n",
-    "    max_mem = '1GB'\n",
+    "    max_mem = \"1GB\"\n",
     "\n",
-    "    array_plan = rechunk(ds_control[['thetao']], target_chunks, max_mem, mapper, temp_store=mapper_temp)\n",
+    "    array_plan = rechunk(\n",
+    "        ds_control[[\"thetao\"]], target_chunks, max_mem, mapper, temp_store=mapper_temp\n",
+    "    )\n",
     "    array_plan.execute(retries=10)\n",
-    "    \n",
+    "\n",
     "ds_control_rechunked = xr.open_zarr(mapper, use_cftime=True)\n",
-    "ds_control_rechunked    "
+    "ds_control_rechunked"
    ]
   },
   {
@@ -2061,8 +2082,8 @@
     }
    ],
    "source": [
-    "drift = calculate_drift(ds_control_rechunked, ds_historical, 'thetao') \n",
-    "drift = drift.load() # This takes a bit, but it is worth loading this small output to avoid repeated computation\n",
+    "drift = calculate_drift(ds_control_rechunked, ds_historical, \"thetao\")\n",
+    "drift = drift.load()  # This takes a bit, but it is worth loading this small output to avoid repeated computation\n",
     "drift"
    ]
   },
@@ -2116,13 +2137,18 @@
    "source": [
     "start = drift.trend_time_range.isel(bnds=0).data.tolist()\n",
     "stop = drift.trend_time_range.isel(bnds=1).data.tolist()\n",
-    "time = xr.cftime_range(start, stop, freq='1MS')\n",
+    "time = xr.cftime_range(start, stop, freq=\"1MS\")\n",
     "\n",
     "# cut the control it to the time over which the trend was calculated\n",
     "ds_control_cut = ds_control_rechunked.sel(time=slice(start, stop))\n",
     "\n",
     "# use the linear slope from the same point to construct a trendline\n",
-    "trendline = xr.DataArray((np.arange(len(time)) * drift.thetao.isel(**roi).data) + ds_control_cut.thetao.isel(**roi, time=0).data, dims=['time'], coords={'time':time})"
+    "trendline = xr.DataArray(\n",
+    "    (np.arange(len(time)) * drift.thetao.isel(**roi).data)\n",
+    "    + ds_control_cut.thetao.isel(**roi, time=0).data,\n",
+    "    dims=[\"time\"],\n",
+    "    coords={\"time\": time},\n",
+    ")"
    ]
   },
   {
@@ -2205,7 +2231,9 @@
     }
    ],
    "source": [
-    "control_detrended = remove_trend(ds_control, drift, 'thetao', ref_date=str(ds_control.time.data[0]))\n",
+    "control_detrended = remove_trend(\n",
+    "    ds_control, drift, \"thetao\", ref_date=str(ds_control.time.data[0])\n",
+    ")\n",
     "control_detrended.isel(**roi).plot()"
    ]
   },
@@ -2247,7 +2275,9 @@
     }
    ],
    "source": [
-    "ds_historical_dedrifted = remove_trend(ds_historical, drift, 'thetao', ref_date=str(ds_historical.time.data[0]))\n",
+    "ds_historical_dedrifted = remove_trend(\n",
+    "    ds_historical, drift, \"thetao\", ref_date=str(ds_historical.time.data[0])\n",
+    ")\n",
     "ds_historical_dedrifted.isel(**roi).plot()"
    ]
   },
@@ -2866,7 +2896,7 @@
     }
    ],
    "source": [
-    "ds_historical_dedrifted.attrs['drift_removed']"
+    "ds_historical_dedrifted.attrs[\"drift_removed\"]"
    ]
   },
   {
@@ -2930,10 +2960,10 @@
     "ds_ssp585_dedrifted = remove_trend(\n",
     "    ds_ssp585,\n",
     "    drift,\n",
-    "    'thetao',\n",
-    "    ref_date=str(ds_historical.time.data[0]) \n",
-    "    # Note that the ref_date is still the first time point of the *historical*run. \n",
-    "    # This ensures that the scenario is treated as an extension of the historical \n",
+    "    \"thetao\",\n",
+    "    ref_date=str(ds_historical.time.data[0]),\n",
+    "    # Note that the ref_date is still the first time point of the *historical*run.\n",
+    "    # This ensures that the scenario is treated as an extension of the historical\n",
     "    # run and the offset is calculated appropriately\n",
     ")"
    ]
@@ -2968,10 +2998,16 @@
     }
    ],
    "source": [
-    "ds_historical.isel(**roi).thetao.coarsen(time=36, boundary='trim').mean().plot(color='C0', label='raw data')\n",
-    "ds_ssp585.isel(**roi).thetao.coarsen(time=36, boundary='trim').mean().plot(color='C0')\n",
-    "ds_historical_dedrifted.isel(**roi).coarsen(time=36, boundary='trim').mean().plot(color='C1', label='control drift removed')\n",
-    "ds_ssp585_dedrifted.isel(**roi).coarsen(time=36, boundary='trim').mean().plot(color='C1')"
+    "ds_historical.isel(**roi).thetao.coarsen(time=36, boundary=\"trim\").mean().plot(\n",
+    "    color=\"C0\", label=\"raw data\"\n",
+    ")\n",
+    "ds_ssp585.isel(**roi).thetao.coarsen(time=36, boundary=\"trim\").mean().plot(color=\"C0\")\n",
+    "ds_historical_dedrifted.isel(**roi).coarsen(time=36, boundary=\"trim\").mean().plot(\n",
+    "    color=\"C1\", label=\"control drift removed\"\n",
+    ")\n",
+    "ds_ssp585_dedrifted.isel(**roi).coarsen(time=36, boundary=\"trim\").mean().plot(\n",
+    "    color=\"C1\"\n",
+    ")"
    ]
   },
   {
diff --git a/docs/postprocessing.ipynb b/docs/postprocessing.ipynb
index 26538964..91ccc41f 100644
--- a/docs/postprocessing.ipynb
+++ b/docs/postprocessing.ipynb
@@ -98,23 +98,20 @@
     "from xmip.preprocessing import combined_preprocessing\n",
     "\n",
     "col = google_cmip_col()\n",
-    "experiment_id='historical'\n",
-    "source_id = ['CanESM5-CanOE', 'GFDL-ESM4']\n",
+    "experiment_id = \"historical\"\n",
+    "source_id = [\"CanESM5-CanOE\", \"GFDL-ESM4\"]\n",
     "kwargs = {\n",
-    "    'zarr_kwargs':{\n",
-    "        'consolidated':True,\n",
-    "        'use_cftime':True\n",
-    "    },\n",
-    "    'aggregate':False,\n",
-    "    'preprocess':combined_preprocessing\n",
+    "    \"zarr_kwargs\": {\"consolidated\": True, \"use_cftime\": True},\n",
+    "    \"aggregate\": False,\n",
+    "    \"preprocess\": combined_preprocessing,\n",
     "}\n",
     "\n",
     "cat_data = col.search(\n",
     "    source_id=source_id,\n",
     "    experiment_id=experiment_id,\n",
-    "    grid_label='gn',\n",
-    "    table_id='Omon',\n",
-    "    variable_id=['tos', 'zos']\n",
+    "    grid_label=\"gn\",\n",
+    "    table_id=\"Omon\",\n",
+    "    variable_id=[\"tos\", \"zos\"],\n",
     ")\n",
     "ddict = cat_data.to_dataset_dict(**kwargs)\n",
     "list(ddict.keys())"
@@ -1348,7 +1345,7 @@
    ],
    "source": [
     "# check if the merging worked\n",
-    "ddict_merged['GFDL-ESM4.gn.historical.Omon.r2i1p1f1']"
+    "ddict_merged[\"GFDL-ESM4.gn.historical.Omon.r2i1p1f1\"]"
    ]
   },
   {
@@ -2311,7 +2308,7 @@
     "\n",
     "ddict_concat = concat_members(ddict_merged)\n",
     "print(list(ddict_concat.keys()))\n",
-    "ddict_concat['GFDL-ESM4.gn.historical.Omon']"
+    "ddict_concat[\"GFDL-ESM4.gn.historical.Omon\"]"
    ]
   },
   {
@@ -2344,12 +2341,13 @@
    ],
    "source": [
     "import matplotlib.pyplot as plt\n",
-    "plt.figure(figsize=[8,4])\n",
+    "\n",
+    "plt.figure(figsize=[8, 4])\n",
     "for i, (name, ds) in enumerate(ddict_concat.items()):\n",
-    "    data = ds.tos.where(ds.zos<0).mean(['x','y'])\n",
-    "    plt.subplot(2,1,i+1)\n",
-    "    data.coarsen(time=12*5).mean().plot(hue='member_id')\n",
-    "    plt.gca().set_title(ds.attrs['source_id'])"
+    "    data = ds.tos.where(ds.zos < 0).mean([\"x\", \"y\"])\n",
+    "    plt.subplot(2, 1, i + 1)\n",
+    "    data.coarsen(time=12 * 5).mean().plot(hue=\"member_id\")\n",
+    "    plt.gca().set_title(ds.attrs[\"source_id\"])"
    ]
   },
   {
@@ -3541,9 +3539,10 @@
    ],
    "source": [
     "from xmip.postprocessing import pick_first_member\n",
+    "\n",
     "ddict_single_member = pick_first_member(ddict_merged)\n",
     "print(list(ddict_single_member.keys()))\n",
-    "ddict_single_member['GFDL-ESM4.gn.historical.Omon']"
+    "ddict_single_member[\"GFDL-ESM4.gn.historical.Omon\"]"
    ]
   },
   {
@@ -4500,15 +4499,17 @@
    "source": [
     "from xmip.postprocessing import combine_datasets\n",
     "\n",
+    "\n",
     "def pick_first_member(ds_list, **kwargs):\n",
     "    return ds_list[0]\n",
     "\n",
+    "\n",
     "ddict_new = combine_datasets(\n",
     "    ddict_merged,\n",
     "    pick_first_member,\n",
-    "    match_attrs=['source_id', 'grid_label', 'experiment_id', 'table_id']\n",
+    "    match_attrs=[\"source_id\", \"grid_label\", \"experiment_id\", \"table_id\"],\n",
     ")\n",
-    "ddict_new['CanESM5-CanOE.gn.historical.Omon']"
+    "ddict_new[\"CanESM5-CanOE.gn.historical.Omon\"]"
    ]
   },
   {
@@ -4635,9 +4636,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from xmip.postprocessing import (\n",
-    "    interpolate_grid_label\n",
-    ")"
+    "from xmip.postprocessing import interpolate_grid_label"
    ]
   },
   {
@@ -4656,7 +4655,7 @@
     }
    ],
    "source": [
-    "combined_grids_dict = interpolate_grid_label(ddict, target_grid_label='gn')"
+    "combined_grids_dict = interpolate_grid_label(ddict, target_grid_label=\"gn\")"
    ]
   },
   {
@@ -5724,7 +5723,7 @@
     }
    ],
    "source": [
-    "combined_grids_dict['GFDL-ESM4.historical.Omon.r1i1p1f1']"
+    "combined_grids_dict[\"GFDL-ESM4.historical.Omon.r1i1p1f1\"]"
    ]
   },
   {
@@ -6686,7 +6685,9 @@
     }
    ],
    "source": [
-    "ddict['CMIP.NOAA-GFDL.GFDL-ESM4.historical.r1i1p1f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/NOAA-GFDL/GFDL-ESM4/historical/r1i1p1f1/Omon/thetao/gn/v20190726/.nan.20190726']"
+    "ddict[\n",
+    "    \"CMIP.NOAA-GFDL.GFDL-ESM4.historical.r1i1p1f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/NOAA-GFDL/GFDL-ESM4/historical/r1i1p1f1/Omon/thetao/gn/v20190726/.nan.20190726\"\n",
+    "]"
    ]
   },
   {
@@ -6793,13 +6794,13 @@
     "\n",
     "for name, ds in combined_grids_dict.items():\n",
     "    ds = ds.isel(lev=0, time=0)\n",
-    "    plt.figure(figsize=[10,12])\n",
-    "    plt.subplot(2,1,1)\n",
+    "    plt.figure(figsize=[10, 12])\n",
+    "    plt.subplot(2, 1, 1)\n",
     "    ds.thetao.plot()\n",
-    "    plt.title(name+' thetao')\n",
-    "    plt.subplot(2,1,2)\n",
+    "    plt.title(name + \" thetao\")\n",
+    "    plt.subplot(2, 1, 2)\n",
     "    ds.o2.plot()\n",
-    "    plt.title(name+' o2')\n",
+    "    plt.title(name + \" o2\")\n",
     "    plt.show()"
    ]
   },
@@ -6840,7 +6841,8 @@
    "source": [
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
-    "plt.rcParams['figure.figsize'] = 12, 6\n",
+    "\n",
+    "plt.rcParams[\"figure.figsize\"] = 12, 6\n",
     "%config InlineBackend.figure_format = 'retina'"
    ]
   },
@@ -6902,11 +6904,17 @@
     "from xmip.preprocessing import combined_preprocessing\n",
     "\n",
     "col = google_cmip_col()\n",
-    "experiment_id='historical'\n",
-    "source_id = 'MPI-ESM1-2-LR'\n",
-    "kwargs = {'zarr_kwargs':{'consolidated':True, 'use_cftime':True}, 'aggregate':False, 'preprocess':combined_preprocessing}\n",
+    "experiment_id = \"historical\"\n",
+    "source_id = \"MPI-ESM1-2-LR\"\n",
+    "kwargs = {\n",
+    "    \"zarr_kwargs\": {\"consolidated\": True, \"use_cftime\": True},\n",
+    "    \"aggregate\": False,\n",
+    "    \"preprocess\": combined_preprocessing,\n",
+    "}\n",
     "\n",
-    "cat_data = col.search(source_id=source_id, experiment_id=experiment_id, variable_id='tos')\n",
+    "cat_data = col.search(\n",
+    "    source_id=source_id, experiment_id=experiment_id, variable_id=\"tos\"\n",
+    ")\n",
     "ddict = cat_data.to_dataset_dict(**kwargs)"
    ]
   },
@@ -6968,7 +6976,9 @@
     }
    ],
    "source": [
-    "cat_metric = col.search(source_id=source_id, experiment_id=experiment_id, variable_id='areacello')\n",
+    "cat_metric = col.search(\n",
+    "    source_id=source_id, experiment_id=experiment_id, variable_id=\"areacello\"\n",
+    ")\n",
     "ddict_metrics = cat_metric.to_dataset_dict(**kwargs)"
    ]
   },
@@ -6988,7 +6998,8 @@
    "outputs": [],
    "source": [
     "from xmip.postprocessing import match_metrics\n",
-    "ddict_matched = match_metrics(ddict, ddict_metrics, ['areacello'])"
+    "\n",
+    "ddict_matched = match_metrics(ddict, ddict_metrics, [\"areacello\"])"
    ]
   },
   {
@@ -8014,7 +8025,9 @@
     }
    ],
    "source": [
-    "ddict_matched['CMIP.MPI-M.MPI-ESM1-2-LR.historical.r1i1p1f1.Oday.tos.gn.gs://cmip6/CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/historical/r1i1p1f1/Oday/tos/gn/v20190710/.nan.20190710']"
+    "ddict_matched[\n",
+    "    \"CMIP.MPI-M.MPI-ESM1-2-LR.historical.r1i1p1f1.Oday.tos.gn.gs://cmip6/CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/historical/r1i1p1f1/Oday/tos/gn/v20190710/.nan.20190710\"\n",
+    "]"
    ]
   },
   {
@@ -8063,9 +8076,11 @@
     "for ds in ddict_matched.values():\n",
     "    # calculate the weighted average over the surface level temperatures\n",
     "    area = ds.areacello.fillna(0)\n",
-    "    da = ds.tos.isel(time=slice(0,240)).weighted(area).mean(['x','y']).squeeze().load()\n",
-    "    da.plot(ax=ax, label=ds.attrs['variant_label'])\n",
-    "ax.legend(bbox_to_anchor=(1, 1), loc='upper left')"
+    "    da = (\n",
+    "        ds.tos.isel(time=slice(0, 240)).weighted(area).mean([\"x\", \"y\"]).squeeze().load()\n",
+    "    )\n",
+    "    da.plot(ax=ax, label=ds.attrs[\"variant_label\"])\n",
+    "ax.legend(bbox_to_anchor=(1, 1), loc=\"upper left\")"
    ]
   },
   {
@@ -8162,13 +8177,19 @@
     }
    ],
    "source": [
-    "cat_data = col.search(source_id=source_id, experiment_id=experiment_id, variable_id='thetao')\n",
+    "cat_data = col.search(\n",
+    "    source_id=source_id, experiment_id=experiment_id, variable_id=\"thetao\"\n",
+    ")\n",
     "ddict = cat_data.to_dataset_dict(**kwargs)\n",
     "\n",
-    "cat_metric = col.search(source_id=source_id, variable_id=['areacello', 'thkcello'], experiment_id='historical')\n",
+    "cat_metric = col.search(\n",
+    "    source_id=source_id,\n",
+    "    variable_id=[\"areacello\", \"thkcello\"],\n",
+    "    experiment_id=\"historical\",\n",
+    ")\n",
     "ddict_metrics = cat_metric.to_dataset_dict(**kwargs)\n",
     "\n",
-    "# Matching \n"
+    "# Matching"
    ]
   },
   {
@@ -9351,8 +9372,10 @@
     }
    ],
    "source": [
-    "ddict_matched_again = match_metrics(ddict, ddict_metrics, ['areacello', 'thkcello'])\n",
-    "ddict_matched_again['CMIP.MPI-M.MPI-ESM1-2-LR.historical.r2i1p1f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/historical/r2i1p1f1/Omon/thetao/gn/v20190710/.nan.20190710']"
+    "ddict_matched_again = match_metrics(ddict, ddict_metrics, [\"areacello\", \"thkcello\"])\n",
+    "ddict_matched_again[\n",
+    "    \"CMIP.MPI-M.MPI-ESM1-2-LR.historical.r2i1p1f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/historical/r2i1p1f1/Omon/thetao/gn/v20190710/.nan.20190710\"\n",
+    "]"
    ]
   },
   {
@@ -9400,10 +9423,16 @@
     "fig, ax = plt.subplots()\n",
     "for i, ds in enumerate(ddict_matched_again.values()):\n",
     "    # calculate the volume weighted mean ocean temperature\n",
-    "    vol = (ds.areacello * ds.thkcello)\n",
-    "    da = ds.thetao.isel(time=slice(-240, None)).weighted(vol.fillna(0)).mean(['x','y', 'lev']).squeeze().load()\n",
-    "    da.plot(ax=ax, color=f'C{i}', label=ds.attrs['variant_label'])\n",
-    "ax.legend(bbox_to_anchor=(1, 1), loc='upper left')"
+    "    vol = ds.areacello * ds.thkcello\n",
+    "    da = (\n",
+    "        ds.thetao.isel(time=slice(-240, None))\n",
+    "        .weighted(vol.fillna(0))\n",
+    "        .mean([\"x\", \"y\", \"lev\"])\n",
+    "        .squeeze()\n",
+    "        .load()\n",
+    "    )\n",
+    "    da.plot(ax=ax, color=f\"C{i}\", label=ds.attrs[\"variant_label\"])\n",
+    "ax.legend(bbox_to_anchor=(1, 1), loc=\"upper left\")"
    ]
   },
   {
@@ -9530,9 +9559,20 @@
     }
    ],
    "source": [
-    "cat_data = col.search(source_id='FGOALS-f3-L', variable_id='thetao', experiment_id=experiment_id, grid_label='gn', table_id='Omon')\n",
+    "cat_data = col.search(\n",
+    "    source_id=\"FGOALS-f3-L\",\n",
+    "    variable_id=\"thetao\",\n",
+    "    experiment_id=experiment_id,\n",
+    "    grid_label=\"gn\",\n",
+    "    table_id=\"Omon\",\n",
+    ")\n",
     "ddict = cat_data.to_dataset_dict(**kwargs)\n",
-    "cat_metric = col.search(source_id='FGOALS-f3-L', variable_id='areacello', experiment_id='historical', grid_label='gn')\n",
+    "cat_metric = col.search(\n",
+    "    source_id=\"FGOALS-f3-L\",\n",
+    "    variable_id=\"areacello\",\n",
+    "    experiment_id=\"historical\",\n",
+    "    grid_label=\"gn\",\n",
+    ")\n",
     "ddict_metrics = cat_metric.to_dataset_dict(**kwargs)"
    ]
   },
@@ -9597,7 +9637,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ddict_matched = match_metrics(ddict, ddict_metrics, ['areacello'])"
+    "ddict_matched = match_metrics(ddict, ddict_metrics, [\"areacello\"])"
    ]
   },
   {
@@ -10411,7 +10451,9 @@
     }
    ],
    "source": [
-    "ds = ddict_matched['CMIP.CAS.FGOALS-f3-L.historical.r2i1p1f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/CAS/FGOALS-f3-L/historical/r2i1p1f1/Omon/thetao/gn/v20191008/.nan.20191008']\n",
+    "ds = ddict_matched[\n",
+    "    \"CMIP.CAS.FGOALS-f3-L.historical.r2i1p1f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/CAS/FGOALS-f3-L/historical/r2i1p1f1/Omon/thetao/gn/v20191008/.nan.20191008\"\n",
+    "]\n",
     "ds"
    ]
   },
@@ -11246,9 +11288,16 @@
     }
    ],
    "source": [
-    "ddict_matched_strict = match_metrics(ddict, ddict_metrics, ['areacello'], match_attrs=['source_id', 'grid_label', 'variant_label'])\n",
+    "ddict_matched_strict = match_metrics(\n",
+    "    ddict,\n",
+    "    ddict_metrics,\n",
+    "    [\"areacello\"],\n",
+    "    match_attrs=[\"source_id\", \"grid_label\", \"variant_label\"],\n",
+    ")\n",
     "\n",
-    "ds_strict = ddict_matched_strict['CMIP.CAS.FGOALS-f3-L.historical.r2i1p1f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/CAS/FGOALS-f3-L/historical/r2i1p1f1/Omon/thetao/gn/v20191008/.nan.20191008']\n",
+    "ds_strict = ddict_matched_strict[\n",
+    "    \"CMIP.CAS.FGOALS-f3-L.historical.r2i1p1f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/CAS/FGOALS-f3-L/historical/r2i1p1f1/Omon/thetao/gn/v20191008/.nan.20191008\"\n",
+    "]\n",
     "ds_strict"
    ]
   },
@@ -12068,7 +12117,9 @@
     }
    ],
    "source": [
-    "ds_strict_matched = ddict_matched_strict['CMIP.CAS.FGOALS-f3-L.historical.r1i1p1f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/CAS/FGOALS-f3-L/historical/r1i1p1f1/Omon/thetao/gn/v20190822/.nan.20190822']\n",
+    "ds_strict_matched = ddict_matched_strict[\n",
+    "    \"CMIP.CAS.FGOALS-f3-L.historical.r1i1p1f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/CAS/FGOALS-f3-L/historical/r1i1p1f1/Omon/thetao/gn/v20190822/.nan.20190822\"\n",
+    "]\n",
     "ds_strict_matched"
    ]
   },
@@ -12210,24 +12261,36 @@
     }
    ],
    "source": [
-    "experiment_id = 'ssp585'\n",
-    "cat_data = col.search(variable_id='tos', experiment_id=experiment_id, grid_label='gn', table_id='Omon')\n",
+    "experiment_id = \"ssp585\"\n",
+    "cat_data = col.search(\n",
+    "    variable_id=\"tos\", experiment_id=experiment_id, grid_label=\"gn\", table_id=\"Omon\"\n",
+    ")\n",
     "\n",
     "##### remove a single store\n",
     "# see https://github.com/intake/intake-esm/issues/246 for details on how to modify the dataframe\n",
     "df = cat_data.df\n",
-    "drop_idx = cat_data.df.index[cat_data.df['zstore'].str.contains('ScenarioMIP.CCCma.CanESM5.ssp585.r9i1p1f1.Omon.tos.gn')]\n",
+    "drop_idx = cat_data.df.index[\n",
+    "    cat_data.df[\"zstore\"].str.contains(\n",
+    "        \"ScenarioMIP.CCCma.CanESM5.ssp585.r9i1p1f1.Omon.tos.gn\"\n",
+    "    )\n",
+    "]\n",
     "df = df.drop(drop_idx)\n",
     "cat_data = cat_data.from_df(df=df, esmcol_data=cat_data.esmcol_data)\n",
     "#####\n",
     "\n",
     "ddict = cat_data.to_dataset_dict(**kwargs)\n",
-    "cat_metric = col.search(variable_id='areacello', experiment_id=experiment_id, grid_label='gn')\n",
+    "cat_metric = col.search(\n",
+    "    variable_id=\"areacello\", experiment_id=experiment_id, grid_label=\"gn\"\n",
+    ")\n",
     "ddict_metrics = cat_metric.to_dataset_dict(**kwargs)\n",
-    "ddict_matched = match_metrics(ddict, ddict_metrics, ['areacello'], print_statistics=True)\n",
+    "ddict_matched = match_metrics(\n",
+    "    ddict, ddict_metrics, [\"areacello\"], print_statistics=True\n",
+    ")\n",
     "\n",
     "# remove the datasets where the parsing was unsuccesful\n",
-    "ddict_matched_filtered = {k:ds for k,ds in ddict_matched.items() if 'areacello' in ds.variables}"
+    "ddict_matched_filtered = {\n",
+    "    k: ds for k, ds in ddict_matched.items() if \"areacello\" in ds.variables\n",
+    "}"
    ]
   },
   {
@@ -12264,19 +12327,25 @@
     }
    ],
    "source": [
-    "models = np.sort(cat_metric.df['source_id'].unique())\n",
+    "models = np.sort(cat_metric.df[\"source_id\"].unique())\n",
     "fig, axarr = plt.subplots(ncols=6, nrows=5, figsize=[16, 8], sharex=True, sharey=True)\n",
     "for model, ax in zip(models, axarr.flat):\n",
-    "    ddict_model = {k:ds for k,ds in ddict_matched_filtered.items() if model in k}\n",
+    "    ddict_model = {k: ds for k, ds in ddict_matched_filtered.items() if model in k}\n",
     "    for i, ds in enumerate(ddict_model.values()):\n",
     "        pass\n",
     "        # calculate the area weighted mean surface ocean temperature\n",
-    "        da = ds.tos.sel(time=slice('2000', '2100')).weighted(ds.areacello.fillna(0)).mean(['x','y', 'lev']).squeeze().load()\n",
+    "        da = (\n",
+    "            ds.tos.sel(time=slice(\"2000\", \"2100\"))\n",
+    "            .weighted(ds.areacello.fillna(0))\n",
+    "            .mean([\"x\", \"y\", \"lev\"])\n",
+    "            .squeeze()\n",
+    "            .load()\n",
+    "        )\n",
     "        # resample to 3month averages\n",
-    "        da = da.resample(time='3MS').mean()\n",
-    "        da.plot(ax=ax, color=f'C{1}', label=ds.attrs['variant_label'], alpha=0.5)\n",
-    "    ax.text(0.03,0.97,model,ha='left',va='top', transform=ax.transAxes)\n",
-    "    ax.set_xlabel('')\n",
+    "        da = da.resample(time=\"3MS\").mean()\n",
+    "        da.plot(ax=ax, color=f\"C{1}\", label=ds.attrs[\"variant_label\"], alpha=0.5)\n",
+    "    ax.text(0.03, 0.97, model, ha=\"left\", va=\"top\", transform=ax.transAxes)\n",
+    "    ax.set_xlabel(\"\")\n",
     "    ax.grid()\n",
     "fig.subplots_adjust(hspace=0, wspace=0)"
    ]
diff --git a/docs/regionmask.ipynb b/docs/regionmask.ipynb
index 038837aa..e2256810 100644
--- a/docs/regionmask.ipynb
+++ b/docs/regionmask.ipynb
@@ -87,7 +87,6 @@
     "import intake\n",
     "import matplotlib.pyplot as plt\n",
     "from xmip.preprocessing import combined_preprocessing\n",
-    "import xarray as xr\n",
     "import numpy as np"
    ]
   },
@@ -133,12 +132,32 @@
     "# import example cloud datasets\n",
     "col_url = \"https://raw.githubusercontent.com/NCAR/intake-esm-datastore/master/catalogs/pangeo-cmip6.json\"\n",
     "col = intake.open_esm_datastore(col_url)\n",
-    "cat = col.search(source_id=['CAMS-CSM1-0', 'CNRM-CM6-1', 'CNRM-ESM2-1', 'ACCESS-CM2', 'ACCESS-ESM1-5', 'EC-Earth3-Veg',\n",
-    "                            'MIROC-ES2L', 'MIROC6', 'HadGEM3-GC31-LL', 'UKESM1-0-LL', 'MPI-ESM1-2-HR', 'MRI-ESM2-0',\n",
-    "                            'NorCPM1', 'GFDL-CM4', 'GFDL-ESM4', 'NESM3'],\n",
-    "                 experiment_id='historical', variable_id='thetao')\n",
-    "data_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True, 'decode_times': False},\n",
-    "                                preprocess=combined_preprocessing)"
+    "cat = col.search(\n",
+    "    source_id=[\n",
+    "        \"CAMS-CSM1-0\",\n",
+    "        \"CNRM-CM6-1\",\n",
+    "        \"CNRM-ESM2-1\",\n",
+    "        \"ACCESS-CM2\",\n",
+    "        \"ACCESS-ESM1-5\",\n",
+    "        \"EC-Earth3-Veg\",\n",
+    "        \"MIROC-ES2L\",\n",
+    "        \"MIROC6\",\n",
+    "        \"HadGEM3-GC31-LL\",\n",
+    "        \"UKESM1-0-LL\",\n",
+    "        \"MPI-ESM1-2-HR\",\n",
+    "        \"MRI-ESM2-0\",\n",
+    "        \"NorCPM1\",\n",
+    "        \"GFDL-CM4\",\n",
+    "        \"GFDL-ESM4\",\n",
+    "        \"NESM3\",\n",
+    "    ],\n",
+    "    experiment_id=\"historical\",\n",
+    "    variable_id=\"thetao\",\n",
+    ")\n",
+    "data_dict = cat.to_dataset_dict(\n",
+    "    zarr_kwargs={\"consolidated\": True, \"decode_times\": False},\n",
+    "    preprocess=combined_preprocessing,\n",
+    ")"
    ]
   },
   {
@@ -445,23 +464,29 @@
     "import cartopy.crs as ccrs\n",
     "\n",
     "for k, ds in data_dict.items():\n",
-    "    if 'lev' in ds.dims:\n",
-    "        model = ds.attrs['source_id']\n",
-    "        if 'member_id' in ds.dims:\n",
+    "    if \"lev\" in ds.dims:\n",
+    "        model = ds.attrs[\"source_id\"]\n",
+    "        if \"member_id\" in ds.dims:\n",
     "            ds = ds.isel(member_id=0)\n",
     "        ds = ds.thetao.isel(time=0, lev=0).squeeze()\n",
     "\n",
-    "        mask = merged_mask(basins,ds)\n",
+    "        mask = merged_mask(basins, ds)\n",
     "\n",
-    "        kwargs = dict(x='lon', y='lat',transform = ccrs.PlateCarree(), infer_intervals=False)\n",
-    "        fig, (ax1, ax2, ax3) = plt.subplots(ncols=3, figsize=[20,2], subplot_kw={'projection':ccrs.Robinson(190)})\n",
+    "        kwargs = dict(\n",
+    "            x=\"lon\", y=\"lat\", transform=ccrs.PlateCarree(), infer_intervals=False\n",
+    "        )\n",
+    "        fig, (ax1, ax2, ax3) = plt.subplots(\n",
+    "            ncols=3, figsize=[20, 2], subplot_kw={\"projection\": ccrs.Robinson(190)}\n",
+    "        )\n",
     "        ds.plot(ax=ax1, **kwargs)\n",
     "        ax1.set_title(f\"Raw field {model}\")\n",
-    "        \n",
-    "        ds_masked = ds.where(np.logical_or(np.logical_or(mask == 2, mask==3),mask==4)) # Pacific + Maritime Continent\n",
+    "\n",
+    "        ds_masked = ds.where(\n",
+    "            np.logical_or(np.logical_or(mask == 2, mask == 3), mask == 4)\n",
+    "        )  # Pacific + Maritime Continent\n",
     "        ds_masked.plot(ax=ax2, **kwargs)\n",
     "        ax2.set_title(f\"Masked Pacific {model}\")\n",
-    "        mask.plot(ax=ax3, cmap='tab20', vmin=0, vmax=19, **kwargs)\n",
+    "        mask.plot(ax=ax3, cmap=\"tab20\", vmin=0, vmax=19, **kwargs)\n",
     "        ax3.set_title(f\"Full Mask {model}\")\n",
     "        for ax in [ax1, ax2, ax3]:\n",
     "            ax.coastlines()"
diff --git a/docs/tutorial.ipynb b/docs/tutorial.ipynb
index c65b259e..72bed1e1 100644
--- a/docs/tutorial.ipynb
+++ b/docs/tutorial.ipynb
@@ -30,6 +30,7 @@
     "import matplotlib.pyplot as plt\n",
     "import intake\n",
     "import dask\n",
+    "\n",
     "%matplotlib inline"
    ]
   },
@@ -103,16 +104,19 @@
    ],
    "source": [
     "# load a few models to illustrate the problem\n",
-    "query = dict(experiment_id=['piControl'], table_id='Oyr', \n",
-    "             variable_id='o2', grid_label=['gn', 'gr'],\n",
-    "             source_id=['IPSL-CM6A-LR', 'CanESM5', 'GFDL-ESM4']\n",
-    "            )\n",
+    "query = dict(\n",
+    "    experiment_id=[\"piControl\"],\n",
+    "    table_id=\"Oyr\",\n",
+    "    variable_id=\"o2\",\n",
+    "    grid_label=[\"gn\", \"gr\"],\n",
+    "    source_id=[\"IPSL-CM6A-LR\", \"CanESM5\", \"GFDL-ESM4\"],\n",
+    ")\n",
     "cat = col.search(**query)\n",
     "\n",
-    "cat.df['source_id'].unique()\n",
-    "z_kwargs = {'consolidated': True, 'decode_times':False}\n",
-    "with dask.config.set(**{'array.slicing.split_large_chunks': True}):\n",
-    "    dset_dict = cat.to_dataset_dict(zarr_kwargs=z_kwargs)#"
+    "cat.df[\"source_id\"].unique()\n",
+    "z_kwargs = {\"consolidated\": True, \"decode_times\": False}\n",
+    "with dask.config.set(**{\"array.slicing.split_large_chunks\": True}):\n",
+    "    dset_dict = cat.to_dataset_dict(zarr_kwargs=z_kwargs)  #"
    ]
   },
   {
@@ -208,12 +212,14 @@
     "\n",
     "# load a few models to illustrate the problem\n",
     "cat = col.search(**query)\n",
-    "cat.df['source_id'].unique()\n",
+    "cat.df[\"source_id\"].unique()\n",
     "\n",
     "\n",
     "# pass the preprocessing directly\n",
-    "with dask.config.set(**{'array.slicing.split_large_chunks': True}):\n",
-    "    dset_dict_renamed = cat.to_dataset_dict(zarr_kwargs=z_kwargs, preprocess=rename_cmip6)\n",
+    "with dask.config.set(**{\"array.slicing.split_large_chunks\": True}):\n",
+    "    dset_dict_renamed = cat.to_dataset_dict(\n",
+    "        zarr_kwargs=z_kwargs, preprocess=rename_cmip6\n",
+    "    )\n",
     "\n",
     "for k, ds in dset_dict_renamed.items():\n",
     "    print(k)\n",
@@ -1161,7 +1167,7 @@
    ],
    "source": [
     "# IPSL data is a bit of a mess\n",
-    "ds = dset_dict['CMIP.IPSL.IPSL-CM6A-LR.piControl.Oyr.gn']\n",
+    "ds = dset_dict[\"CMIP.IPSL.IPSL-CM6A-LR.piControl.Oyr.gn\"]\n",
     "ds = rename_cmip6(ds)\n",
     "ds"
    ]
@@ -2114,10 +2120,14 @@
     }
    ],
    "source": [
-    "from xmip.preprocessing import promote_empty_dims, broadcast_lonlat, replace_x_y_nominal_lat_lon\n",
+    "from xmip.preprocessing import (\n",
+    "    promote_empty_dims,\n",
+    "    broadcast_lonlat,\n",
+    "    replace_x_y_nominal_lat_lon,\n",
+    ")\n",
     "\n",
     "# check out the previous datasets\n",
-    "ds = dset_dict_renamed['CMIP.IPSL.IPSL-CM6A-LR.piControl.Oyr.gn']\n",
+    "ds = dset_dict_renamed[\"CMIP.IPSL.IPSL-CM6A-LR.piControl.Oyr.gn\"]\n",
     "ds"
    ]
   },
@@ -3862,7 +3872,7 @@
     }
    ],
    "source": [
-    "ds = dset_dict_renamed['CMIP.NOAA-GFDL.GFDL-ESM4.piControl.Oyr.gr']\n",
+    "ds = dset_dict_renamed[\"CMIP.NOAA-GFDL.GFDL-ESM4.piControl.Oyr.gr\"]\n",
     "ds"
    ]
   },
@@ -4802,7 +4812,7 @@
     }
    ],
    "source": [
-    "ds = dset_dict_renamed['CMIP.CCCma.CanESM5.piControl.Oyr.gn']\n",
+    "ds = dset_dict_renamed[\"CMIP.CCCma.CanESM5.piControl.Oyr.gn\"]\n",
     "print(ds.y.data)\n",
     "\n",
     "ds = replace_x_y_nominal_lat_lon(ds)\n",
@@ -4877,10 +4887,10 @@
     "    ds = replace_x_y_nominal_lat_lon(ds)\n",
     "    return ds\n",
     "\n",
+    "\n",
     "# pass the preprocessing directly\n",
-    "with dask.config.set(**{'array.slicing.split_large_chunks': True}):\n",
-    "    dset_dict_processed1 = cat.to_dataset_dict(zarr_kwargs=z_kwargs,\n",
-    "                                               preprocess=wrapper)"
+    "with dask.config.set(**{\"array.slicing.split_large_chunks\": True}):\n",
+    "    dset_dict_processed1 = cat.to_dataset_dict(zarr_kwargs=z_kwargs, preprocess=wrapper)"
    ]
   },
   {
@@ -4902,11 +4912,11 @@
     }
    ],
    "source": [
-    "fig, axarr = plt.subplots(nrows=3, figsize=[10,15])\n",
+    "fig, axarr = plt.subplots(nrows=3, figsize=[10, 15])\n",
     "for ax, (k, ds) in zip(axarr.flat, dset_dict_processed1.items()):\n",
-    "    if 'member_id' in ds.dims:\n",
+    "    if \"member_id\" in ds.dims:\n",
     "        ds = ds.isel(member_id=-1)\n",
-    "    ds.o2.isel(time=0, lev=0).sel(y=slice(-15,15)).plot(ax=ax)\n",
+    "    ds.o2.isel(time=0, lev=0).sel(y=slice(-15, 15)).plot(ax=ax)\n",
     "    ax.set_title(k)\n",
     "    ax.set_aspect(2)"
    ]
@@ -4982,6 +4992,7 @@
    "source": [
     "from xmip.preprocessing import correct_lon\n",
     "\n",
+    "\n",
     "# same as above\n",
     "def wrapper(ds):\n",
     "    ds = ds.copy()\n",
@@ -4992,10 +5003,10 @@
     "    ds = replace_x_y_nominal_lat_lon(ds)\n",
     "    return ds\n",
     "\n",
+    "\n",
     "# pass the preprocessing directly\n",
-    "with dask.config.set(**{'array.slicing.split_large_chunks': True}):\n",
-    "    dset_dict_processed2 = cat.to_dataset_dict(zarr_kwargs=z_kwargs,\n",
-    "                                               preprocess=wrapper)"
+    "with dask.config.set(**{\"array.slicing.split_large_chunks\": True}):\n",
+    "    dset_dict_processed2 = cat.to_dataset_dict(zarr_kwargs=z_kwargs, preprocess=wrapper)"
    ]
   },
   {
@@ -5017,11 +5028,11 @@
     }
    ],
    "source": [
-    "fig, axarr = plt.subplots(nrows=3, figsize=[10,15])\n",
+    "fig, axarr = plt.subplots(nrows=3, figsize=[10, 15])\n",
     "for ax, (k, ds) in zip(axarr.flat, dset_dict_processed2.items()):\n",
-    "    if 'member_id' in ds.dims:\n",
+    "    if \"member_id\" in ds.dims:\n",
     "        ds = ds.isel(member_id=-1)\n",
-    "    ds.o2.isel(time=0, lev=0).sel(y=slice(-15,15)).plot(ax=ax)\n",
+    "    ds.o2.isel(time=0, lev=0).sel(y=slice(-15, 15)).plot(ax=ax)\n",
     "    ax.set_title(k)\n",
     "    ax.set_aspect(2)"
    ]
@@ -5127,13 +5138,21 @@
    ],
    "source": [
     "from xmip.preprocessing import correct_units\n",
-    "query = dict(experiment_id = ['historical'],variable_id='thetao', grid_label=['gn'],source_id=['CESM2', 'CanESM5'], member_id='r1i1p1f1',\n",
-    "             )\n",
+    "\n",
+    "query = dict(\n",
+    "    experiment_id=[\"historical\"],\n",
+    "    variable_id=\"thetao\",\n",
+    "    grid_label=[\"gn\"],\n",
+    "    source_id=[\"CESM2\", \"CanESM5\"],\n",
+    "    member_id=\"r1i1p1f1\",\n",
+    ")\n",
     "cat = col.search(**query)\n",
     "# raw data read in\n",
     "dset_dict = cat.to_dataset_dict(zarr_kwargs=z_kwargs)\n",
     "# fixed units\n",
-    "dset_dict_fixed_unit = cat.to_dataset_dict(zarr_kwargs=z_kwargs, preprocess=correct_units)"
+    "dset_dict_fixed_unit = cat.to_dataset_dict(\n",
+    "    zarr_kwargs=z_kwargs, preprocess=correct_units\n",
+    ")"
    ]
   },
   {
@@ -5177,9 +5196,9 @@
     }
    ],
    "source": [
-    "dset_dict['CMIP.NCAR.CESM2.historical.Omon.gn'].lev.plot()\n",
+    "dset_dict[\"CMIP.NCAR.CESM2.historical.Omon.gn\"].lev.plot()\n",
     "plt.figure()\n",
-    "dset_dict_fixed_unit['CMIP.NCAR.CESM2.historical.Omon.gn'].lev.plot()"
+    "dset_dict_fixed_unit[\"CMIP.NCAR.CESM2.historical.Omon.gn\"].lev.plot()"
    ]
   },
   {
@@ -5208,9 +5227,9 @@
     }
    ],
    "source": [
-    "fig, axarr = plt.subplots(nrows=2, figsize=[10,10])\n",
+    "fig, axarr = plt.subplots(nrows=2, figsize=[10, 10])\n",
     "for ax, (k, ds) in zip(axarr.flat, dset_dict_fixed_unit.items()):\n",
-    "    ds.thetao.isel(time=0).sel(lev=5000, method='nearest').plot(ax=ax, vmin=-1, vmax=5)\n",
+    "    ds.thetao.isel(time=0).sel(lev=5000, method=\"nearest\").plot(ax=ax, vmin=-1, vmax=5)\n",
     "    ax.set_title(k)"
    ]
   },
@@ -5240,9 +5259,9 @@
     }
    ],
    "source": [
-    "fig, axarr = plt.subplots(nrows=2, figsize=[10,10])\n",
+    "fig, axarr = plt.subplots(nrows=2, figsize=[10, 10])\n",
     "for ax, (k, ds) in zip(axarr.flat, dset_dict.items()):\n",
-    "    ds.thetao.isel(time=0).sel(lev=5000, method='nearest').plot(ax=ax, vmin=-1, vmax=5)\n",
+    "    ds.thetao.isel(time=0).sel(lev=5000, method=\"nearest\").plot(ax=ax, vmin=-1, vmax=5)\n",
     "    ax.set_title(k)"
    ]
   },
@@ -5341,7 +5360,13 @@
     }
    ],
    "source": [
-    "from xmip.preprocessing import correct_coordinates,parse_lon_lat_bounds, maybe_convert_bounds_to_vertex, maybe_convert_vertex_to_bounds\n",
+    "from xmip.preprocessing import (\n",
+    "    correct_coordinates,\n",
+    "    parse_lon_lat_bounds,\n",
+    "    maybe_convert_bounds_to_vertex,\n",
+    "    maybe_convert_vertex_to_bounds,\n",
+    ")\n",
+    "\n",
     "\n",
     "# same as above\n",
     "def wrapper(ds):\n",
@@ -5357,10 +5382,10 @@
     "    ds = maybe_convert_vertex_to_bounds(ds)\n",
     "    return ds\n",
     "\n",
+    "\n",
     "# pass the preprocessing directly\n",
     "\n",
-    "dset_dict_processed3 = cat.to_dataset_dict(zarr_kwargs=z_kwargs,\n",
-    "                                           preprocess=wrapper)"
+    "dset_dict_processed3 = cat.to_dataset_dict(zarr_kwargs=z_kwargs, preprocess=wrapper)"
    ]
   },
   {
@@ -5599,29 +5624,32 @@
     "from xmip.preprocessing import combined_preprocessing\n",
     "\n",
     "# lets load a bunch more models this time\n",
-    "query = dict(experiment_id=['piControl', 'historical'],\n",
-    "             table_id='Oyr', \n",
-    "             source_id=[\n",
-    "                 'GFDL-ESM4',\n",
-    "                 'IPSL-CM6A-LR',\n",
-    "                 'CanESM5',\n",
-    "                 'CanESM5-CanOE',\n",
-    "                 'MPI-ESM-1-2-HAM',\n",
-    "                 'MPI-ESM1-2-HR',\n",
-    "                 'MPI-ESM1-2-LR',\n",
-    "                 'ACCESS-ESM1-5',\n",
-    "                 'MRI-ESM2-0',\n",
-    "                 'IPSL-CM5A2-INCA',\n",
-    "                 'EC-Earth3-CC'\n",
-    "             ],\n",
-    "             variable_id='o2',\n",
-    "             grid_label=['gn', 'gr'])\n",
+    "query = dict(\n",
+    "    experiment_id=[\"piControl\", \"historical\"],\n",
+    "    table_id=\"Oyr\",\n",
+    "    source_id=[\n",
+    "        \"GFDL-ESM4\",\n",
+    "        \"IPSL-CM6A-LR\",\n",
+    "        \"CanESM5\",\n",
+    "        \"CanESM5-CanOE\",\n",
+    "        \"MPI-ESM-1-2-HAM\",\n",
+    "        \"MPI-ESM1-2-HR\",\n",
+    "        \"MPI-ESM1-2-LR\",\n",
+    "        \"ACCESS-ESM1-5\",\n",
+    "        \"MRI-ESM2-0\",\n",
+    "        \"IPSL-CM5A2-INCA\",\n",
+    "        \"EC-Earth3-CC\",\n",
+    "    ],\n",
+    "    variable_id=\"o2\",\n",
+    "    grid_label=[\"gn\", \"gr\"],\n",
+    ")\n",
     "cat = col.search(**query)\n",
     "\n",
-    "print(cat.df['source_id'].unique())\n",
-    "with dask.config.set(**{'array.slicing.split_large_chunks': True}):\n",
-    "    dset_dict = cat.to_dataset_dict(zarr_kwargs=z_kwargs,\n",
-    "                                    preprocess=combined_preprocessing)"
+    "print(cat.df[\"source_id\"].unique())\n",
+    "with dask.config.set(**{\"array.slicing.split_large_chunks\": True}):\n",
+    "    dset_dict = cat.to_dataset_dict(\n",
+    "        zarr_kwargs=z_kwargs, preprocess=combined_preprocessing\n",
+    "    )"
    ]
   },
   {
@@ -5643,15 +5671,15 @@
     }
    ],
    "source": [
-    "fig, axarr = plt.subplots(nrows=4, ncols=3, figsize=[25,15])\n",
-    "for ax,(k, ds) in zip(axarr.flat,dset_dict.items()):\n",
-    "    if 'member_id' in ds.dims:\n",
+    "fig, axarr = plt.subplots(nrows=4, ncols=3, figsize=[25, 15])\n",
+    "for ax, (k, ds) in zip(axarr.flat, dset_dict.items()):\n",
+    "    if \"member_id\" in ds.dims:\n",
     "        ds = ds.isel(member_id=0)\n",
     "    da = ds.o2.isel(time=0).interp(lev=2500)\n",
-    "    # this step is necessary to order the longitudes properly for simple plotting. Alternatively you could use a proper map projection \n",
+    "    # this step is necessary to order the longitudes properly for simple plotting. Alternatively you could use a proper map projection\n",
     "    # with e.g. cartopy and would not need this step\n",
     "    da = replace_x_y_nominal_lat_lon(da)\n",
-    "    da = da.sel(x=slice(100, 200), y=slice(-20,20))\n",
+    "    da = da.sel(x=slice(100, 200), y=slice(-20, 20))\n",
     "    try:\n",
     "        da.plot(ax=ax, vmax=0.25, vmin=0.05)\n",
     "    except:\n",
diff --git a/notebooks/add_more_models.ipynb b/notebooks/add_more_models.ipynb
index a4eda4b6..cb045978 100644
--- a/notebooks/add_more_models.ipynb
+++ b/notebooks/add_more_models.ipynb
@@ -34,8 +34,6 @@
     "%load_ext autoreload\n",
     "%autoreload 2\n",
     "import intake\n",
-    "import pandas as pd\n",
-    "import xarray as xr\n",
     "from xmip.preprocessing import cmip6_renaming_dict"
    ]
   },
@@ -183,11 +181,11 @@
     "# Grab all available ocean output.\n",
     "url = \"https://raw.githubusercontent.com/NCAR/intake-esm-datastore/master/catalogs/pangeo-cmip6.json\"\n",
     "col = intake.open_esm_datastore(url)\n",
-    "query = dict(table_id=['Omon', 'Oyr']) # pick all available ocean fields for now\n",
+    "query = dict(table_id=[\"Omon\", \"Oyr\"])  # pick all available ocean fields for now\n",
     "cat = col.search(**query)\n",
     "\n",
     "# find unique source_ids\n",
-    "available_models = cat.df['source_id'].unique()\n",
+    "available_models = cat.df[\"source_id\"].unique()\n",
     "# find available models in xmip\n",
     "models = [k for k in cmip6_renaming_dict().keys()]\n",
     "# find missing models\n",
@@ -211,29 +209,47 @@
     }
    ],
    "source": [
-    "all_variables = cat.df['variable_id'].unique()\n",
-    "surface_variables = ['tos', 'chl', 'zos', 'chlos', 'fgco2', 'hfds', 'sos',\n",
-    "                     'mlotst', 'tauuo', 'tauvo', 'msftmz', 'intpp']\n",
+    "all_variables = cat.df[\"variable_id\"].unique()\n",
+    "surface_variables = [\n",
+    "    \"tos\",\n",
+    "    \"chl\",\n",
+    "    \"zos\",\n",
+    "    \"chlos\",\n",
+    "    \"fgco2\",\n",
+    "    \"hfds\",\n",
+    "    \"sos\",\n",
+    "    \"mlotst\",\n",
+    "    \"tauuo\",\n",
+    "    \"tauvo\",\n",
+    "    \"msftmz\",\n",
+    "    \"intpp\",\n",
+    "]\n",
     "remaining_variables = [v for v in all_variables if v not in surface_variables]\n",
-    "# some models literally have only surface input. Ill deal with that later. For now lets look at the ones that have 3d \n",
+    "# some models literally have only surface input. Ill deal with that later. For now lets look at the ones that have 3d\n",
     "# output (if surface values show up below, add them to the list above.)\n",
     "# now load one dataset for each missing model\n",
-    "query = dict(table_id=['Omon', 'Oyr'], source_id=missing_models, variable_id=remaining_variables) # pick all available ocean fields for now\n",
+    "query = dict(\n",
+    "    table_id=[\"Omon\", \"Oyr\"], source_id=missing_models, variable_id=remaining_variables\n",
+    ")  # pick all available ocean fields for now\n",
     "cat_sub = col.search(**query)\n",
-    "cat_sub.df = cat_sub.df.drop_duplicates(subset='source_id')\n",
-    "surface_only_models = [m for m in missing_models if m not in cat_sub.df['source_id'].unique()]\n",
+    "cat_sub.df = cat_sub.df.drop_duplicates(subset=\"source_id\")\n",
+    "surface_only_models = [\n",
+    "    m for m in missing_models if m not in cat_sub.df[\"source_id\"].unique()\n",
+    "]\n",
     "if len(cat_sub.df) > 0:\n",
-    "    dset_dict = cat_sub.to_dataset_dict(zarr_kwargs={'consolidated': True, 'decode_times':False})\n",
+    "    dset_dict = cat_sub.to_dataset_dict(\n",
+    "        zarr_kwargs={\"consolidated\": True, \"decode_times\": False}\n",
+    "    )\n",
     "    for k, ds in dset_dict.items():\n",
-    "        print('=========================================')\n",
+    "        print(\"=========================================\")\n",
     "        print(k)\n",
-    "        print('=========================================')\n",
+    "        print(\"=========================================\")\n",
     "        print(ds)\n",
-    "        da = ds[ds.attrs['variable_id']]\n",
+    "        da = ds[ds.attrs[\"variable_id\"]]\n",
     "        if len(da.dims) < 5:\n",
-    "            print('!!! This is a surface field!!!')\n",
+    "            print(\"!!! This is a surface field!!!\")\n",
     "else:\n",
-    "    print('Nice. All models with 3d ocean fields are catalogued')"
+    "    print(\"Nice. All models with 3d ocean fields are catalogued\")"
    ]
   },
   {
@@ -257,20 +273,26 @@
     }
    ],
    "source": [
-    "query = dict(table_id=['Omon', 'Oyr'], source_id=surface_only_models, variable_id=surface_variables)\n",
+    "query = dict(\n",
+    "    table_id=[\"Omon\", \"Oyr\"],\n",
+    "    source_id=surface_only_models,\n",
+    "    variable_id=surface_variables,\n",
+    ")\n",
     "cat_surf = col.search(**query)\n",
-    "cat_surf.df = cat_surf.df.drop_duplicates(subset='source_id')\n",
+    "cat_surf.df = cat_surf.df.drop_duplicates(subset=\"source_id\")\n",
     "\n",
     "if len(cat_sub.df) > 0:\n",
-    "    dset_dict = cat_surf.to_dataset_dict(zarr_kwargs={'consolidated': True, 'decode_times':False})\n",
+    "    dset_dict = cat_surf.to_dataset_dict(\n",
+    "        zarr_kwargs={\"consolidated\": True, \"decode_times\": False}\n",
+    "    )\n",
     "    for k, ds in dset_dict.items():\n",
-    "        print('=========================================')\n",
+    "        print(\"=========================================\")\n",
     "        print(k)\n",
-    "        print('=========================================')\n",
+    "        print(\"=========================================\")\n",
     "        print(ds)\n",
-    "        da = ds[ds.attrs['variable_id']]\n",
+    "        da = ds[ds.attrs[\"variable_id\"]]\n",
     "else:\n",
-    "    print('Nice. All ocean models are catalogued')"
+    "    print(\"Nice. All ocean models are catalogued\")"
    ]
   },
   {
@@ -301,7 +323,7 @@
     }
    ],
    "source": [
-    "col.df['table_id'].unique()"
+    "col.df[\"table_id\"].unique()"
    ]
   },
   {
@@ -346,9 +368,9 @@
     }
    ],
    "source": [
-    "lst = ['a', 'b']\n",
+    "lst = [\"a\", \"b\"]\n",
     "\n",
-    "'a' in lst"
+    "\"a\" in lst"
    ]
   },
   {
@@ -1356,19 +1378,21 @@
     }
    ],
    "source": [
-    "query = dict(table_id='6hrLev')\n",
+    "query = dict(table_id=\"6hrLev\")\n",
     "cat_atmos = col.search(**query)\n",
-    "cat_atmos.df = cat_atmos.df.drop_duplicates(subset='source_id')\n",
+    "cat_atmos.df = cat_atmos.df.drop_duplicates(subset=\"source_id\")\n",
     "print(cat_atmos.df)\n",
     "print(len(cat_atmos.df))\n",
     "if len(cat_atmos.df) > 0:\n",
-    "    dset_dict = cat_atmos.to_dataset_dict(zarr_kwargs={'consolidated': True, 'decode_times':False})\n",
+    "    dset_dict = cat_atmos.to_dataset_dict(\n",
+    "        zarr_kwargs={\"consolidated\": True, \"decode_times\": False}\n",
+    "    )\n",
     "    for k, ds in dset_dict.items():\n",
-    "        print('=========================================')\n",
+    "        print(\"=========================================\")\n",
     "        print(k)\n",
-    "        print('=========================================')\n",
+    "        print(\"=========================================\")\n",
     "        print(ds)\n",
-    "        da = ds[ds.attrs['variable_id']]\n",
+    "        da = ds[ds.attrs[\"variable_id\"]]\n",
     "# else:\n",
     "#     print('Nice. All ocean models are catalogued')"
    ]
@@ -1386,9 +1410,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "query = dict(table_id=['Omon', 'Oyr']) # pick all available ocean fields for now\n",
+    "query = dict(table_id=[\"Omon\", \"Oyr\"])  # pick all available ocean fields for now\n",
     "cat = col.search(**query)\n",
-    "cat.df = cat.df.drop_duplicates(subset='source_id')"
+    "cat.df = cat.df.drop_duplicates(subset=\"source_id\")"
    ]
   },
   {
@@ -1398,6 +1422,7 @@
    "outputs": [],
    "source": [
     "import matplotlib.pyplot as plt\n",
+    "\n",
     "%matplotlib inline"
    ]
   },
@@ -3709,27 +3734,25 @@
     "# be fixed\n",
     "\n",
     "# I should also independently test for gn and gr variables\n",
-    "test_vars = [v for v in all_variables if v not in ['msftmz']]\n",
+    "test_vars = [v for v in all_variables if v not in [\"msftmz\"]]\n",
     "\n",
     "for model in models:\n",
-    "    if 'AWI' not in model:\n",
+    "    if \"AWI\" not in model:\n",
     "        print(model)\n",
-    "        cat_check = col.search(source_id=model,\n",
-    "                               variable_id=test_vars,\n",
-    "                               **query)\n",
-    "        cat_check.df = cat_check.df.drop_duplicates(subset='source_id')\n",
+    "        cat_check = col.search(source_id=model, variable_id=test_vars, **query)\n",
+    "        cat_check.df = cat_check.df.drop_duplicates(subset=\"source_id\")\n",
     "        check = cat_check.to_dataset_dict(\n",
-    "            zarr_kwargs={'consolidated': True, 'decode_times':False},\n",
-    "            preprocess=combined_preprocessing\n",
+    "            zarr_kwargs={\"consolidated\": True, \"decode_times\": False},\n",
+    "            preprocess=combined_preprocessing,\n",
     "        )\n",
     "        ds = check[list(check.keys())[0]]\n",
-    "        var = ds.attrs['variable_id']\n",
+    "        var = ds.attrs[\"variable_id\"]\n",
     "        da = ds[var]\n",
     "        print(da)\n",
-    "        for di in ['time', 'member_id', 'lev', 'rho']:\n",
+    "        for di in [\"time\", \"member_id\", \"lev\", \"rho\"]:\n",
     "            if di in da.dims:\n",
-    "                da = da.isel({di:0})\n",
-    "                \n",
+    "                da = da.isel({di: 0})\n",
+    "\n",
     "        plt.figure()\n",
     "        da.plot(robust=True)\n",
     "        plt.show()"
diff --git a/notebooks/maintenance_grids.ipynb b/notebooks/maintenance_grids.ipynb
index 931c0f2e..2b96c2e0 100644
--- a/notebooks/maintenance_grids.ipynb
+++ b/notebooks/maintenance_grids.ipynb
@@ -41,6 +41,7 @@
    ],
    "source": [
     "import xmip\n",
+    "\n",
     "xmip.__version__"
    ]
   },
@@ -78,6 +79,7 @@
     "# col = intake.open_esm_datastore(url)\n",
     "\n",
     "from xmip.utils import google_cmip_col\n",
+    "\n",
     "col = google_cmip_col()"
    ]
   },
@@ -251,50 +253,51 @@
     "    \"\"\"Show which source_id/grid_label combos have any data, and return a df that picks only one dataset for each combo\"\"\"\n",
     "    url = \"https://raw.githubusercontent.com/NCAR/intake-esm-datastore/master/catalogs/pangeo-cmip6.json\"\n",
     "    col = intake.open_esm_datastore(url)\n",
-    "    query = dict(table_id=['Omon', 'Oyr'])\n",
+    "    query = dict(table_id=[\"Omon\", \"Oyr\"])\n",
     "    if variables is not None:\n",
-    "        query['variable_id'] = variables\n",
-    "        \n",
-    "    \n",
+    "        query[\"variable_id\"] = variables\n",
+    "\n",
     "    # pick all available ocean fields for now\n",
     "    cat = col.search(**query)\n",
     "    print(cat)\n",
-    "    \n",
+    "\n",
     "    available = []\n",
     "    dataframes = []\n",
     "    df = cat.df.copy()\n",
-    "    groups = df.groupby(['source_id', 'grid_label'])\n",
+    "    groups = df.groupby([\"source_id\", \"grid_label\"])\n",
     "    for group in groups:\n",
-    "        \n",
     "        # add source_id/grid_label combo to list\n",
-    "        label = '.'.join(group[0])\n",
+    "        label = \".\".join(group[0])\n",
     "        # pick only the first index of each group\n",
-    "        line = group[1].iloc[0,:]\n",
-    "        \n",
+    "        line = group[1].iloc[0, :]\n",
+    "\n",
     "        available.append(label)\n",
     "        dataframes.append(line)\n",
     "\n",
     "    new_df = pd.concat(dataframes, axis=1).transpose()\n",
     "    cat.df = new_df\n",
-    "    \n",
+    "\n",
     "    return cat, available\n",
     "\n",
+    "\n",
     "_, all_models = available_output()\n",
     "print(len(all_models))\n",
     "\n",
-    "cat_tracer, tracer_models = available_output(['tos', 'thetao'])\n",
+    "cat_tracer, tracer_models = available_output([\"tos\", \"thetao\"])\n",
     "missing_tracer_models = set(tracer_models).symmetric_difference(set(all_models))\n",
     "print(f\"Did not find tracer data for these models:{missing_tracer_models}\\n\")\n",
     "\n",
-    "cat_u, u_models = available_output(['uo'])\n",
+    "cat_u, u_models = available_output([\"uo\"])\n",
     "missing_u_models = set(u_models).symmetric_difference(set(all_models))\n",
     "print(f\"Did not find u data for these models:{missing_u_models}\\n\")\n",
     "\n",
-    "cat_v, v_models = available_output(['vo'])\n",
+    "cat_v, v_models = available_output([\"vo\"])\n",
     "missing_v_models = set(v_models).symmetric_difference(set(all_models))\n",
     "print(f\"Did not find v data for these models:{missing_v_models}\\n\")\n",
     "\n",
-    "print(f\"Any models that have only u or v:{set(v_models).symmetric_difference(set(u_models))}\")"
+    "print(\n",
+    "    f\"Any models that have only u or v:{set(v_models).symmetric_difference(set(u_models))}\"\n",
+    ")"
    ]
   },
   {
@@ -2263,13 +2266,17 @@
    "source": [
     "# for now load them manually\n",
     "import fsspec\n",
-    "import xarray as xr\n",
+    "\n",
     "super_dict = {}\n",
-    "for var, cat in zip(['thetao', 'uo', 'vo'],[cat_tracer, cat_u, cat_v]):\n",
-    "    super_dict[var]={}\n",
-    "    for ri,(rr,row) in enumerate(cat.df.iterrows()):\n",
-    "#         print(ri)\n",
-    "        ds = combined_preprocessing(xr.open_zarr(fsspec.get_mapper(row['zstore']), consolidated=True, decode_times=False))\n",
+    "for var, cat in zip([\"thetao\", \"uo\", \"vo\"], [cat_tracer, cat_u, cat_v]):\n",
+    "    super_dict[var] = {}\n",
+    "    for ri, (rr, row) in enumerate(cat.df.iterrows()):\n",
+    "        #         print(ri)\n",
+    "        ds = combined_preprocessing(\n",
+    "            xr.open_zarr(\n",
+    "                fsspec.get_mapper(row[\"zstore\"]), consolidated=True, decode_times=False\n",
+    "            )\n",
+    "        )\n",
     "        label = f\"{row['source_id']}.{row['grid_label']}\"\n",
     "        super_dict[var][label] = ds"
    ]
@@ -2417,71 +2424,72 @@
    ],
    "source": [
     "staggered_grid_dict = {}\n",
-    "for k in super_dict['thetao'].keys():\n",
-    "    ds_ref = super_dict['thetao'][k]\n",
-    "    s_id = ds_ref.attrs['source_id']\n",
-    "    g_la = ds_ref.attrs['grid_label']\n",
-    "    \n",
-    "    if not ('AWI' in k and 'gn' in k):\n",
+    "for k in super_dict[\"thetao\"].keys():\n",
+    "    ds_ref = super_dict[\"thetao\"][k]\n",
+    "    s_id = ds_ref.attrs[\"source_id\"]\n",
+    "    g_la = ds_ref.attrs[\"grid_label\"]\n",
+    "\n",
+    "    if not (\"AWI\" in k and \"gn\" in k):\n",
     "        print(f\"############### {k} #######################\")\n",
-    "        if k in super_dict['uo'].keys() and k in super_dict['vo'].keys():\n",
-    "            \n",
-    "            ds_u = super_dict['uo'][k]\n",
-    "            ds_v = super_dict['vo'][k]\n",
-    "            \n",
-    "            if 'x' not in ds_ref.dims:\n",
-    "                print(f'THIS IS SOME ERROR IN THE PREPROCESSSING. INVESTIGATE {k}')\n",
+    "        if k in super_dict[\"uo\"].keys() and k in super_dict[\"vo\"].keys():\n",
+    "            ds_u = super_dict[\"uo\"][k]\n",
+    "            ds_v = super_dict[\"vo\"][k]\n",
+    "\n",
+    "            if \"x\" not in ds_ref.dims:\n",
+    "                print(f\"THIS IS SOME ERROR IN THE PREPROCESSSING. INVESTIGATE {k}\")\n",
     "                # a nevermind, these are just the AWI ones...remove them earlier...\n",
     "            else:\n",
-    "                x_shift_u = detect_shift(ds_ref, ds_u, 'X')\n",
-    "                y_shift_u = detect_shift(ds_ref, ds_u, 'Y')\n",
+    "                x_shift_u = detect_shift(ds_ref, ds_u, \"X\")\n",
+    "                y_shift_u = detect_shift(ds_ref, ds_u, \"Y\")\n",
     "\n",
-    "                x_shift_v = detect_shift(ds_ref, ds_v, 'X')\n",
-    "                y_shift_v = detect_shift(ds_ref, ds_v, 'Y')\n",
-    "                \n",
-    "                \n",
+    "                x_shift_v = detect_shift(ds_ref, ds_v, \"X\")\n",
+    "                y_shift_v = detect_shift(ds_ref, ds_v, \"Y\")\n",
     "\n",
     "                # check that there is only one left after removing 'center'\n",
-    "                x_shift = set([x_shift_u, x_shift_v]) - set(['center'])\n",
-    "                y_shift = set([y_shift_u, y_shift_v]) - set(['center'])\n",
+    "                x_shift = set([x_shift_u, x_shift_v]) - set([\"center\"])\n",
+    "                y_shift = set([y_shift_u, y_shift_v]) - set([\"center\"])\n",
     "                # if they are all on center default to left\n",
     "\n",
     "                if len(x_shift) == 0:\n",
-    "                    x_shift = 'left'\n",
+    "                    x_shift = \"left\"\n",
     "                elif len(x_shift) == 1:\n",
     "                    x_shift = list(x_shift)[0]\n",
     "                else:\n",
-    "                    print('SCHEISSE X')\n",
-    "                    print('x')\n",
+    "                    print(\"SCHEISSE X\")\n",
+    "                    print(\"x\")\n",
     "                    print(x_shift_u)\n",
     "                    print(x_shift_v)\n",
-    "                    print('y')\n",
+    "                    print(\"y\")\n",
     "                    print(y_shift_u)\n",
     "                    print(y_shift_v)\n",
-    "                    x_shift=None\n",
+    "                    x_shift = None\n",
     "\n",
     "                if len(y_shift) == 0:\n",
-    "                    y_shift = 'left'\n",
+    "                    y_shift = \"left\"\n",
     "                elif len(y_shift) == 1:\n",
     "                    y_shift = list(y_shift)[0]\n",
     "                else:\n",
-    "                    print('SCHEISSE Y')\n",
-    "                    print('u')\n",
+    "                    print(\"SCHEISSE Y\")\n",
+    "                    print(\"u\")\n",
     "                    print(x_shift_u)\n",
     "                    print(y_shift_u)\n",
-    "                    print('v')\n",
+    "                    print(\"v\")\n",
     "                    print(x_shift_v)\n",
     "                    print(y_shift_v)\n",
     "                    y_shift = None\n",
     "        else:\n",
-    "            print(f\"ATTENTION: Setting shift to left for {k}, since there is no velocity data\")\n",
-    "            x_shift = 'left'\n",
-    "            y_shift = 'left'\n",
-    "            \n",
+    "            print(\n",
+    "                f\"ATTENTION: Setting shift to left for {k}, since there is no velocity data\"\n",
+    "            )\n",
+    "            x_shift = \"left\"\n",
+    "            y_shift = \"left\"\n",
+    "\n",
     "        if x_shift is not None and y_shift is not None:\n",
-    "            if not s_id in staggered_grid_dict.keys():\n",
+    "            if s_id not in staggered_grid_dict.keys():\n",
     "                staggered_grid_dict[s_id] = {}\n",
-    "            staggered_grid_dict[s_id][g_la] = {'axis_shift':{'X': x_shift, 'Y': y_shift}}"
+    "            staggered_grid_dict[s_id][g_la] = {\n",
+    "                \"axis_shift\": {\"X\": x_shift, \"Y\": y_shift}\n",
+    "            }"
    ]
   },
   {
@@ -2498,7 +2506,8 @@
    "outputs": [],
    "source": [
     "import yaml\n",
-    "ff = open('/home/jovyan/xmip/xmip/specs/staggered_grid_config.yaml', \"r\")\n",
+    "\n",
+    "ff = open(\"/home/jovyan/xmip/xmip/specs/staggered_grid_config.yaml\", \"r\")\n",
     "grid_dict = yaml.safe_load(ff)\n",
     "ff.close()"
    ]
@@ -2519,7 +2528,9 @@
    ],
    "source": [
     "# any keys in the old dict that are not in the new one?\n",
-    "print(f\"Keys in the old grid, which are not in the new one {set(grid_dict.keys())- set(staggered_grid_dict.keys())}\")\n",
+    "print(\n",
+    "    f\"Keys in the old grid, which are not in the new one {set(grid_dict.keys())- set(staggered_grid_dict.keys())}\"\n",
+    ")\n",
     "\n",
     "print(f\"Newly added models {set(staggered_grid_dict.keys()) - set(grid_dict.keys())}\")"
    ]
@@ -2542,7 +2553,7 @@
     }
    ],
    "source": [
-    "staggered_grid_dict['GFDL-CM4']"
+    "staggered_grid_dict[\"GFDL-CM4\"]"
    ]
   },
   {
@@ -2551,7 +2562,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "with open('test.yaml', 'w') as file:\n",
+    "with open(\"test.yaml\", \"w\") as file:\n",
     "    documents = yaml.dump(staggered_grid_dict, file)"
    ]
   }
diff --git a/notebooks/metric_parse_improvement.ipynb b/notebooks/metric_parse_improvement.ipynb
index 481c4671..95af45c0 100644
--- a/notebooks/metric_parse_improvement.ipynb
+++ b/notebooks/metric_parse_improvement.ipynb
@@ -482,27 +482,36 @@
    ],
    "source": [
     "import pandas as pd\n",
-    "# get all available ocean models from the cloud. \n",
-    "url = 'https://storage.googleapis.com/cmip6/pangeo-cmip6.csv'\n",
+    "\n",
+    "# get all available ocean models from the cloud.\n",
+    "url = \"https://storage.googleapis.com/cmip6/pangeo-cmip6.csv\"\n",
     "df = pd.read_csv(url)\n",
-    "df_ocean =df[(df.table_id=='Omon') + (df.table_id=='Oyr')]\n",
+    "df_ocean = df[(df.table_id == \"Omon\") + (df.table_id == \"Oyr\")]\n",
     "ocean_models = df_ocean.source_id.unique()\n",
     "print(ocean_models)\n",
     "print(len(ocean_models))\n",
     "\n",
-    "exclude_variables = ['mlotst', 'msftmz', 'intpp'] # at the moment cmip6_pp does not like fields that have only one spatial dimension\n",
+    "exclude_variables = [\n",
+    "    \"mlotst\",\n",
+    "    \"msftmz\",\n",
+    "    \"intpp\",\n",
+    "]  # at the moment cmip6_pp does not like fields that have only one spatial dimension\n",
     "\n",
     "\n",
     "# get one dataset from each model, shouldnt matter which\n",
-    "col = intake.open_esm_datastore(\"https://raw.githubusercontent.com/NCAR/intake-esm-datastore/master/catalogs/pangeo-cmip6.json\")\n",
+    "col = intake.open_esm_datastore(\n",
+    "    \"https://raw.githubusercontent.com/NCAR/intake-esm-datastore/master/catalogs/pangeo-cmip6.json\"\n",
+    ")\n",
     "# cat = col.search(source_id=ocean_models)\n",
-    "cat = col.search(table_id=['Omon', 'Oyr'])\n",
-    "exclude_index = [vid not in exclude_variables for vid in cat.df['variable_id']]\n",
+    "cat = col.search(table_id=[\"Omon\", \"Oyr\"])\n",
+    "exclude_index = [vid not in exclude_variables for vid in cat.df[\"variable_id\"]]\n",
     "cat.df = cat.df[exclude_index]\n",
-    "cat.df = cat.df.drop_duplicates(subset='source_id')\n",
+    "cat.df = cat.df.drop_duplicates(subset=\"source_id\")\n",
     "print(len(cat.df))\n",
-    "data_dict_raw = cat.to_dataset_dict(zarr_kwargs={'consolidated': True, 'decode_times':False},\n",
-    "                                preprocess=combined_preprocessing)\n",
+    "data_dict_raw = cat.to_dataset_dict(\n",
+    "    zarr_kwargs={\"consolidated\": True, \"decode_times\": False},\n",
+    "    preprocess=combined_preprocessing,\n",
+    ")\n",
     "data_dict = parse_metrics(data_dict_raw, col, preprocess=combined_preprocessing)"
    ]
   },
@@ -554,7 +563,14 @@
    ],
    "source": [
     "import matplotlib.pyplot as plt\n",
-    "fig, axarr = plt.subplots(ncols=6, nrows=len(data_dict.keys())//6 +1, figsize=[30,30], sharex=True, sharey=True)\n",
+    "\n",
+    "fig, axarr = plt.subplots(\n",
+    "    ncols=6,\n",
+    "    nrows=len(data_dict.keys()) // 6 + 1,\n",
+    "    figsize=[30, 30],\n",
+    "    sharex=True,\n",
+    "    sharey=True,\n",
+    ")\n",
     "for ax, (k, ds) in zip(axarr.flat, data_dict.items()):\n",
     "    ds.areacello.plot(ax=ax, add_labels=False)\n",
     "    ax.set_title(k)"
diff --git a/notebooks/parse_area_gn.ipynb b/notebooks/parse_area_gn.ipynb
index dfe1e931..99e1bf62 100644
--- a/notebooks/parse_area_gn.ipynb
+++ b/notebooks/parse_area_gn.ipynb
@@ -22,8 +22,7 @@
     }
    ],
    "source": [
-    "import intake\n",
-    "import numpy as np"
+    "import intake"
    ]
   },
   {
@@ -174,8 +173,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "query = dict(experiment_id='piControl',\n",
-    "                     variable_id=['thetao', 'uo', 'vo'],table_id='Omon', grid_label='gn')"
+    "query = dict(\n",
+    "    experiment_id=\"piControl\",\n",
+    "    variable_id=[\"thetao\", \"uo\", \"vo\"],\n",
+    "    table_id=\"Omon\",\n",
+    "    grid_label=\"gn\",\n",
+    ")"
    ]
   },
   {
@@ -311,7 +314,10 @@
    "source": [
     "# load the same thing with preprocessing\n",
     "from xmip.preprocessing import read_data\n",
-    "with warnings.catch_warnings(): # these lines just make sure that the warnings dont clutter your notebook\n",
+    "\n",
+    "with (\n",
+    "    warnings.catch_warnings()\n",
+    "):  # these lines just make sure that the warnings dont clutter your notebook\n",
     "    warnings.simplefilter(\"ignore\")\n",
     "    data_dict = read_data(col, **query)"
    ]
@@ -331,7 +337,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "parse_metrics(data_dict, col, rename=True) #rename is important to get the consistent naming!"
+    "parse_metrics(\n",
+    "    data_dict, col, rename=True\n",
+    ")  # rename is important to get the consistent naming!"
    ]
   },
   {
diff --git a/notebooks/testing_various_issues.ipynb b/notebooks/testing_various_issues.ipynb
index 9f99747e..f2cabb3b 100644
--- a/notebooks/testing_various_issues.ipynb
+++ b/notebooks/testing_various_issues.ipynb
@@ -26,19 +26,16 @@
    "source": [
     "import xarray as xr\n",
     "import matplotlib.pyplot as plt\n",
-    "import pandas as pd\n",
     "import numpy as np\n",
     "import intake\n",
     "from xmip.preprocessing import combined_preprocessing\n",
-    "from dask.diagnostics import ProgressBar\n",
-    "import warnings\n",
     "\n",
     "%load_ext autoreload\n",
     "%autoreload 2\n",
     "%matplotlib inline\n",
-    "plt.rcParams['figure.figsize'] = 12, 6\n",
+    "plt.rcParams[\"figure.figsize\"] = 12, 6\n",
     "%config InlineBackend.figure_format = 'retina'\n",
-    "xr.set_options(cmap_sequential='cividis', display_style='html')"
+    "xr.set_options(cmap_sequential=\"cividis\", display_style=\"html\")"
    ]
   },
   {
@@ -47,7 +44,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "col = intake.open_esm_datastore(\"https://raw.githubusercontent.com/NCAR/intake-esm-datastore/master/catalogs/pangeo-cmip6.json\")"
+    "col = intake.open_esm_datastore(\n",
+    "    \"https://raw.githubusercontent.com/NCAR/intake-esm-datastore/master/catalogs/pangeo-cmip6.json\"\n",
+    ")"
    ]
   },
   {
@@ -110,15 +109,18 @@
     }
    ],
    "source": [
-    "query = dict(variable_id=['thetao'],\n",
-    "             experiment_id = 'historical',\n",
-    "             table_id=['Omon'], \n",
-    "             source_id=['CanESM5'],\n",
-    "             grid_label=['gn'],\n",
-    "            )\n",
+    "query = dict(\n",
+    "    variable_id=[\"thetao\"],\n",
+    "    experiment_id=\"historical\",\n",
+    "    table_id=[\"Omon\"],\n",
+    "    source_id=[\"CanESM5\"],\n",
+    "    grid_label=[\"gn\"],\n",
+    ")\n",
     "cat = col.search(**query)\n",
     "cat.df\n",
-    "data_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True, 'decode_times':False}, aggregate=False)\n",
+    "data_dict = cat.to_dataset_dict(\n",
+    "    zarr_kwargs={\"consolidated\": True, \"decode_times\": False}, aggregate=False\n",
+    ")\n",
     "for k, ds in data_dict.items():\n",
     "    print(f\"{k}: {ds.lev_bnds.dims}\")"
    ]
@@ -596,7 +598,7 @@
     }
    ],
    "source": [
-    "ds.lev_bnds.isel(time=0).drop_vars(['time'])"
+    "ds.lev_bnds.isel(time=0).drop_vars([\"time\"])"
    ]
   },
   {
@@ -606,7 +608,7 @@
    "outputs": [],
    "source": [
     "# see if the values are unique with regard to time\n",
-    "ds = data_dict['CMIP.CCCma.CanESM5.historical.r10i1p1f1.Omon.thetao.gn']\n",
+    "ds = data_dict[\"CMIP.CCCma.CanESM5.historical.r10i1p1f1.Omon.thetao.gn\"]\n",
     "np.testing.assert_allclose(ds.lev_bnds.isel(time=0).data, ds.lev_bnds.isel(time=1).data)"
    ]
   },
@@ -635,7 +637,7 @@
    "source": [
     "# query = dict(variable_id=['o2'],\n",
     "# #              experiment_id=['piControl'],\n",
-    "#              table_id=['Omon'], \n",
+    "#              table_id=['Omon'],\n",
     "#              source_id=['GFDL-CM4'],\n",
     "# #              grid_label=['gn']\n",
     "#             )\n",
@@ -657,7 +659,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# this should go to "
+    "# this should go to"
    ]
   },
   {
@@ -723,12 +725,16 @@
     }
    ],
    "source": [
-    "query = dict(variable_id=['so'],\n",
-    "             experiment_id=['historical'],\n",
-    "             table_id=['Omon'], \n",
-    "             source_id=['ACCESS-ESM1-5'])\n",
+    "query = dict(\n",
+    "    variable_id=[\"so\"],\n",
+    "    experiment_id=[\"historical\"],\n",
+    "    table_id=[\"Omon\"],\n",
+    "    source_id=[\"ACCESS-ESM1-5\"],\n",
+    ")\n",
     "cat = col.search(**query)\n",
-    "data_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True, 'decode_times':True})"
+    "data_dict = cat.to_dataset_dict(\n",
+    "    zarr_kwargs={\"consolidated\": True, \"decode_times\": True}\n",
+    ")"
    ]
   },
   {
@@ -777,17 +783,20 @@
     }
    ],
    "source": [
-    "\n",
-    "experiment_ids = ['historical']#,'ssp126',, 'ssp245', 'ssp585'\n",
-    "query = dict(variable_id=['thetao'],#'uo', 'vo', , 'o2'\n",
-    "             experiment_id=experiment_ids,\n",
-    "             table_id=['Omon'])\n",
+    "experiment_ids = [\"historical\"]  # ,'ssp126',, 'ssp245', 'ssp585'\n",
+    "query = dict(\n",
+    "    variable_id=[\"thetao\"],  #'uo', 'vo', , 'o2'\n",
+    "    experiment_id=experiment_ids,\n",
+    "    table_id=[\"Omon\"],\n",
+    ")\n",
     "cat = col.search(**query)\n",
-    "models = cat.df['source_id'].unique()\n",
+    "models = cat.df[\"source_id\"].unique()\n",
     "print(models)\n",
-    "data_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True, 'decode_times':True})\n",
+    "data_dict = cat.to_dataset_dict(\n",
+    "    zarr_kwargs={\"consolidated\": True, \"decode_times\": True}\n",
+    ")\n",
     "# So this fails with the preprocess argument, Ill have to see for which model exactly\n",
-    "for k,ds in data_dict.items():\n",
+    "for k, ds in data_dict.items():\n",
     "    try:\n",
     "        ds_new = combined_preprocessing(ds)\n",
     "    except:\n",
@@ -881,7 +890,10 @@
    ],
    "source": [
     "# Huh this also works. So it seems that the preprocessing messes something up prior to aggregation. Is this due to time?\n",
-    "data_dict_new = cat.to_dataset_dict(zarr_kwargs={'consolidated': True, 'decode_times':True}, preprocess=combined_preprocessing)"
+    "data_dict_new = cat.to_dataset_dict(\n",
+    "    zarr_kwargs={\"consolidated\": True, \"decode_times\": True},\n",
+    "    preprocess=combined_preprocessing,\n",
+    ")"
    ]
   },
   {
@@ -2109,12 +2121,15 @@
     "# Checking each model to constrain the models that have problems, odly this works?\n",
     "for model in models:\n",
     "    print(f\"+++++++{model}+++++++++\")\n",
-    "    model_query = {k:v for k,v in query.items()}\n",
-    "    model_query['source_id'] = model\n",
+    "    model_query = {k: v for k, v in query.items()}\n",
+    "    model_query[\"source_id\"] = model\n",
     "    model_cat = col.search(**model_query)\n",
     "    print(model_cat.df)\n",
     "    try:\n",
-    "        model_data_dict = model_cat.to_dataset_dict(zarr_kwargs={'consolidated': True, 'decode_times':False}, preprocess=combined_preprocessing)\n",
+    "        model_data_dict = model_cat.to_dataset_dict(\n",
+    "            zarr_kwargs={\"consolidated\": True, \"decode_times\": False},\n",
+    "            preprocess=combined_preprocessing,\n",
+    "        )\n",
     "    except Exception as e:\n",
     "        print(f\"{k} failed to preprocess\")\n",
     "        print(e)"
@@ -3719,14 +3734,17 @@
     "possible_fuckers = []\n",
     "for model in models:\n",
     "    print(f\"+++++++{model} excluded+++++++++\")\n",
-    "    model_query = {k:v for k,v in query.items()}\n",
-    "    model_query['source_id'] = [m for m in models if m != model]\n",
+    "    model_query = {k: v for k, v in query.items()}\n",
+    "    model_query[\"source_id\"] = [m for m in models if m != model]\n",
     "    model_cat = col.search(**model_query)\n",
     "    try:\n",
-    "        model_data_dict = model_cat.to_dataset_dict(zarr_kwargs={'consolidated': True, 'decode_times':False}, preprocess=combined_preprocessing)\n",
+    "        model_data_dict = model_cat.to_dataset_dict(\n",
+    "            zarr_kwargs={\"consolidated\": True, \"decode_times\": False},\n",
+    "            preprocess=combined_preprocessing,\n",
+    "        )\n",
     "        possible_fuckers.append(model)\n",
     "    except:\n",
-    "        pass\n"
+    "        pass"
    ]
   },
   {
@@ -3816,8 +3834,11 @@
    ],
    "source": [
     "# Huh this also works. So it seems that the preprocessing messes something up prior to aggregation. Is this due to time?\n",
-    "picked_models = [m for m in models if ('CanESM' not in m)]\n",
-    "data_dict_new = cat.to_dataset_dict(zarr_kwargs={'consolidated': True, 'decode_times':True}, preprocess=combined_preprocessing)"
+    "picked_models = [m for m in models if (\"CanESM\" not in m)]\n",
+    "data_dict_new = cat.to_dataset_dict(\n",
+    "    zarr_kwargs={\"consolidated\": True, \"decode_times\": True},\n",
+    "    preprocess=combined_preprocessing,\n",
+    ")"
    ]
   },
   {
@@ -3849,8 +3870,14 @@
    "source": [
     "import xarray as xr\n",
     "import gcsfs\n",
-    "gcs = gcsfs.GCSFileSystem(token='anon')\n",
-    "ds = xr.open_zarr(gcs.get_mapper('gs://cmip6/CMIP/NASA-GISS/GISS-E2-1-G-CC/historical/r1i1p1f1/Omon/dissic/gn/'), consolidated=True)"
+    "\n",
+    "gcs = gcsfs.GCSFileSystem(token=\"anon\")\n",
+    "ds = xr.open_zarr(\n",
+    "    gcs.get_mapper(\n",
+    "        \"gs://cmip6/CMIP/NASA-GISS/GISS-E2-1-G-CC/historical/r1i1p1f1/Omon/dissic/gn/\"\n",
+    "    ),\n",
+    "    consolidated=True,\n",
+    ")"
    ]
   }
  ],