Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Jan 6, 2025
1 parent 191b202 commit 125adff
Show file tree
Hide file tree
Showing 9 changed files with 620 additions and 377 deletions.
148 changes: 92 additions & 56 deletions docs/drift_removal.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
],
"source": [
"from dask_gateway import Gateway\n",
"\n",
"g = Gateway()\n",
"running_clusters = g.list_clusters()\n",
"print(running_clusters)\n",
Expand Down Expand Up @@ -77,6 +78,7 @@
"source": [
"from distributed import Client\n",
"from dask_gateway import GatewayCluster\n",
"\n",
"cluster = GatewayCluster()\n",
"cluster.scale(30)\n",
"cluster"
Expand Down Expand Up @@ -255,18 +257,22 @@
}
],
"source": [
"zkwargs = {'consolidated':True, 'use_cftime':True}\n",
"kwargs = {'zarr_kwargs':zkwargs, 'preprocess':combined_preprocessing, 'aggregate':False}\n",
"zkwargs = {\"consolidated\": True, \"use_cftime\": True}\n",
"kwargs = {\n",
" \"zarr_kwargs\": zkwargs,\n",
" \"preprocess\": combined_preprocessing,\n",
" \"aggregate\": False,\n",
"}\n",
"\n",
"col = google_cmip_col()\n",
"\n",
"\n",
"cat = col.search(source_id='CanESM5-CanOE', variable_id='thetao')\n",
"cat = col.search(source_id=\"CanESM5-CanOE\", variable_id=\"thetao\")\n",
"\n",
"\n",
"ddict_historical = cat.search(experiment_id='historical').to_dataset_dict(**kwargs)\n",
"ddict_ssp585 = cat.search(experiment_id='ssp585').to_dataset_dict(**kwargs)\n",
"ddict_picontrol = cat.search(experiment_id='piControl').to_dataset_dict(**kwargs)"
"ddict_historical = cat.search(experiment_id=\"historical\").to_dataset_dict(**kwargs)\n",
"ddict_ssp585 = cat.search(experiment_id=\"ssp585\").to_dataset_dict(**kwargs)\n",
"ddict_picontrol = cat.search(experiment_id=\"piControl\").to_dataset_dict(**kwargs)"
]
},
{
Expand All @@ -286,12 +292,18 @@
"metadata": {},
"outputs": [],
"source": [
"ds_control = ddict_picontrol['CMIP.CCCma.CanESM5-CanOE.piControl.r1i1p2f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/CCCma/CanESM5-CanOE/piControl/r1i1p2f1/Omon/thetao/gn/v20190429/.nan.20190429']\n",
"ds_historical = ddict_historical['CMIP.CCCma.CanESM5-CanOE.historical.r1i1p2f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/CCCma/CanESM5-CanOE/historical/r1i1p2f1/Omon/thetao/gn/v20190429/.nan.20190429']\n",
"ds_ssp585 = ddict_ssp585['ScenarioMIP.CCCma.CanESM5-CanOE.ssp585.r1i1p2f1.Omon.thetao.gn.gs://cmip6/CMIP6/ScenarioMIP/CCCma/CanESM5-CanOE/ssp585/r1i1p2f1/Omon/thetao/gn/v20190429/.nan.20190429']\n",
"ds_control = ddict_picontrol[\n",
" \"CMIP.CCCma.CanESM5-CanOE.piControl.r1i1p2f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/CCCma/CanESM5-CanOE/piControl/r1i1p2f1/Omon/thetao/gn/v20190429/.nan.20190429\"\n",
"]\n",
"ds_historical = ddict_historical[\n",
" \"CMIP.CCCma.CanESM5-CanOE.historical.r1i1p2f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/CCCma/CanESM5-CanOE/historical/r1i1p2f1/Omon/thetao/gn/v20190429/.nan.20190429\"\n",
"]\n",
"ds_ssp585 = ddict_ssp585[\n",
" \"ScenarioMIP.CCCma.CanESM5-CanOE.ssp585.r1i1p2f1.Omon.thetao.gn.gs://cmip6/CMIP6/ScenarioMIP/CCCma/CanESM5-CanOE/ssp585/r1i1p2f1/Omon/thetao/gn/v20190429/.nan.20190429\"\n",
"]\n",
"\n",
"# Pick a random location in x/y/z space to use as an exmple\n",
"roi = {'x':100,'y':220, 'lev':30}"
"roi = {\"x\": 100, \"y\": 220, \"lev\": 30}"
]
},
{
Expand Down Expand Up @@ -335,8 +347,8 @@
],
"source": [
"# ok lets just plot them together\n",
"ds_control.isel(**roi).thetao.plot(color='0.5')\n",
"ds_historical.isel(**roi).thetao.plot(color='C1')"
"ds_control.isel(**roi).thetao.plot(color=\"0.5\")\n",
"ds_historical.isel(**roi).thetao.plot(color=\"C1\")"
]
},
{
Expand Down Expand Up @@ -373,7 +385,7 @@
}
],
"source": [
"{k:v for k,v in ds_historical.attrs.items() if 'parent' in k}"
"{k: v for k, v in ds_historical.attrs.items() if \"parent\" in k}"
]
},
{
Expand Down Expand Up @@ -428,8 +440,8 @@
],
"source": [
"# ok lets just plot them together\n",
"ds_control_adj.isel(**roi).thetao.plot(color='0.5')\n",
"ds_historical_adj.isel(**roi).thetao.plot(color='C1')"
"ds_control_adj.isel(**roi).thetao.plot(color=\"0.5\")\n",
"ds_historical_adj.isel(**roi).thetao.plot(color=\"C1\")"
]
},
{
Expand Down Expand Up @@ -471,8 +483,8 @@
],
"source": [
"# ok lets just plot them together\n",
"ds_control_adj.isel(**roi, time=slice(0,24)).thetao.plot()\n",
"ds_historical_adj.isel(**roi, time=slice(0,24)).thetao.plot()"
"ds_control_adj.isel(**roi, time=slice(0, 24)).thetao.plot()\n",
"ds_historical_adj.isel(**roi, time=slice(0, 24)).thetao.plot()"
]
},
{
Expand All @@ -491,8 +503,9 @@
"outputs": [],
"source": [
"from xmip.drift_removal import replace_time\n",
"\n",
"# with the defaults it will just replace the dates with new ones which have time stamps at the beginning of the month.\n",
"ds_historical_adj = replace_time(ds_historical_adj) "
"ds_historical_adj = replace_time(ds_historical_adj)"
]
},
{
Expand Down Expand Up @@ -526,8 +539,8 @@
],
"source": [
"# ok lets just plot them together again\n",
"ds_control_adj.isel(**roi, time=slice(0,24)).thetao.plot()\n",
"ds_historical_adj.isel(**roi, time=slice(0,24)).thetao.plot()"
"ds_control_adj.isel(**roi, time=slice(0, 24)).thetao.plot()\n",
"ds_historical_adj.isel(**roi, time=slice(0, 24)).thetao.plot()"
]
},
{
Expand Down Expand Up @@ -564,7 +577,7 @@
],
"source": [
"for name, ds in ddict_historical.items():\n",
" print(name, ds.attrs['branch_time_in_parent'])"
" print(name, ds.attrs[\"branch_time_in_parent\"])"
]
},
{
Expand Down Expand Up @@ -598,14 +611,18 @@
"# replace the timestamp with the first of the month for the control run and plot\n",
"# we will also average the data yearly to remove some of the visual noise\n",
"\n",
"plt.figure(figsize=[12,4])\n",
"replace_time(ds_control).isel(**roi).thetao.coarsen(time=3).mean().isel(time=slice(0,150*4)).plot(color='0.5')\n",
"plt.figure(figsize=[12, 4])\n",
"replace_time(ds_control).isel(**roi).thetao.coarsen(time=3).mean().isel(\n",
" time=slice(0, 150 * 4)\n",
").plot(color=\"0.5\")\n",
"\n",
"# now we loop through all the historical members, adjust the time and plot them in the same way, \n",
"# now we loop through all the historical members, adjust the time and plot them in the same way,\n",
"# but only for the first 20 years\n",
"for name, ds in ddict_historical.items():\n",
" _, ds_adj = unify_time(ds_control, ds, adjust_to='parent')\n",
" ds_adj.isel(**roi).thetao.coarsen(time=3).mean().isel(time=slice(0,30*4)).plot(color='C1')"
" _, ds_adj = unify_time(ds_control, ds, adjust_to=\"parent\")\n",
" ds_adj.isel(**roi).thetao.coarsen(time=3).mean().isel(time=slice(0, 30 * 4)).plot(\n",
" color=\"C1\"\n",
" )"
]
},
{
Expand Down Expand Up @@ -654,9 +671,10 @@
"# setting up the scratch bucket\n",
"import os\n",
"import fsspec\n",
"PANGEO_SCRATCH = os.environ['PANGEO_SCRATCH']+'cmip6_pp_demo'\n",
"path = f'{PANGEO_SCRATCH}/test_rechunked.zarr'\n",
"temp_path = f'{PANGEO_SCRATCH}/test_rechunked_temp.zarr'\n",
"\n",
"PANGEO_SCRATCH = os.environ[\"PANGEO_SCRATCH\"] + \"cmip6_pp_demo\"\n",
"path = f\"{PANGEO_SCRATCH}/test_rechunked.zarr\"\n",
"temp_path = f\"{PANGEO_SCRATCH}/test_rechunked_temp.zarr\"\n",
"mapper = fsspec.get_mapper(path)\n",
"mapper_temp = fsspec.get_mapper(temp_path)"
]
Expand Down Expand Up @@ -1506,27 +1524,30 @@
"source": [
"if not mapper.fs.exists(path):\n",
" # recompute the rechunked data into the scratch bucket (is only triggered when the temporary store was erased)\n",
" \n",
"\n",
" # Remove the temp store if for some reason that still exists\n",
" if mapper.fs.exists(temp_path):\n",
" mapper.fs.rm(temp_path, recursive=True)\n",
" from rechunker import rechunk\n",
"\n",
" target_chunks = {\n",
" 'thetao': {'time':6012, 'lev':1, 'x':3, 'y':291},\n",
" 'x': {'x':3},\n",
" 'y': {'y':291},\n",
" 'lat': {'x':3, 'y':291},\n",
" 'lev': {'lev':1},\n",
" 'lon': {'x':3, 'y':291},\n",
" 'time': {'time':6012}, \n",
" \"thetao\": {\"time\": 6012, \"lev\": 1, \"x\": 3, \"y\": 291},\n",
" \"x\": {\"x\": 3},\n",
" \"y\": {\"y\": 291},\n",
" \"lat\": {\"x\": 3, \"y\": 291},\n",
" \"lev\": {\"lev\": 1},\n",
" \"lon\": {\"x\": 3, \"y\": 291},\n",
" \"time\": {\"time\": 6012},\n",
" }\n",
" max_mem = '1GB'\n",
" max_mem = \"1GB\"\n",
"\n",
" array_plan = rechunk(ds_control[['thetao']], target_chunks, max_mem, mapper, temp_store=mapper_temp)\n",
" array_plan = rechunk(\n",
" ds_control[[\"thetao\"]], target_chunks, max_mem, mapper, temp_store=mapper_temp\n",
" )\n",
" array_plan.execute(retries=10)\n",
" \n",
"\n",
"ds_control_rechunked = xr.open_zarr(mapper, use_cftime=True)\n",
"ds_control_rechunked "
"ds_control_rechunked"
]
},
{
Expand Down Expand Up @@ -2061,8 +2082,8 @@
}
],
"source": [
"drift = calculate_drift(ds_control_rechunked, ds_historical, 'thetao') \n",
"drift = drift.load() # This takes a bit, but it is worth loading this small output to avoid repeated computation\n",
"drift = calculate_drift(ds_control_rechunked, ds_historical, \"thetao\")\n",
"drift = drift.load() # This takes a bit, but it is worth loading this small output to avoid repeated computation\n",
"drift"
]
},
Expand Down Expand Up @@ -2116,13 +2137,18 @@
"source": [
"start = drift.trend_time_range.isel(bnds=0).data.tolist()\n",
"stop = drift.trend_time_range.isel(bnds=1).data.tolist()\n",
"time = xr.cftime_range(start, stop, freq='1MS')\n",
"time = xr.cftime_range(start, stop, freq=\"1MS\")\n",
"\n",
"# cut the control it to the time over which the trend was calculated\n",
"ds_control_cut = ds_control_rechunked.sel(time=slice(start, stop))\n",
"\n",
"# use the linear slope from the same point to construct a trendline\n",
"trendline = xr.DataArray((np.arange(len(time)) * drift.thetao.isel(**roi).data) + ds_control_cut.thetao.isel(**roi, time=0).data, dims=['time'], coords={'time':time})"
"trendline = xr.DataArray(\n",
" (np.arange(len(time)) * drift.thetao.isel(**roi).data)\n",
" + ds_control_cut.thetao.isel(**roi, time=0).data,\n",
" dims=[\"time\"],\n",
" coords={\"time\": time},\n",
")"
]
},
{
Expand Down Expand Up @@ -2205,7 +2231,9 @@
}
],
"source": [
"control_detrended = remove_trend(ds_control, drift, 'thetao', ref_date=str(ds_control.time.data[0]))\n",
"control_detrended = remove_trend(\n",
" ds_control, drift, \"thetao\", ref_date=str(ds_control.time.data[0])\n",
")\n",
"control_detrended.isel(**roi).plot()"
]
},
Expand Down Expand Up @@ -2247,7 +2275,9 @@
}
],
"source": [
"ds_historical_dedrifted = remove_trend(ds_historical, drift, 'thetao', ref_date=str(ds_historical.time.data[0]))\n",
"ds_historical_dedrifted = remove_trend(\n",
" ds_historical, drift, \"thetao\", ref_date=str(ds_historical.time.data[0])\n",
")\n",
"ds_historical_dedrifted.isel(**roi).plot()"
]
},
Expand Down Expand Up @@ -2866,7 +2896,7 @@
}
],
"source": [
"ds_historical_dedrifted.attrs['drift_removed']"
"ds_historical_dedrifted.attrs[\"drift_removed\"]"
]
},
{
Expand Down Expand Up @@ -2930,10 +2960,10 @@
"ds_ssp585_dedrifted = remove_trend(\n",
" ds_ssp585,\n",
" drift,\n",
" 'thetao',\n",
" ref_date=str(ds_historical.time.data[0]) \n",
" # Note that the ref_date is still the first time point of the *historical*run. \n",
" # This ensures that the scenario is treated as an extension of the historical \n",
" \"thetao\",\n",
" ref_date=str(ds_historical.time.data[0]),\n",
" # Note that the ref_date is still the first time point of the *historical*run.\n",
" # This ensures that the scenario is treated as an extension of the historical\n",
" # run and the offset is calculated appropriately\n",
")"
]
Expand Down Expand Up @@ -2968,10 +2998,16 @@
}
],
"source": [
"ds_historical.isel(**roi).thetao.coarsen(time=36, boundary='trim').mean().plot(color='C0', label='raw data')\n",
"ds_ssp585.isel(**roi).thetao.coarsen(time=36, boundary='trim').mean().plot(color='C0')\n",
"ds_historical_dedrifted.isel(**roi).coarsen(time=36, boundary='trim').mean().plot(color='C1', label='control drift removed')\n",
"ds_ssp585_dedrifted.isel(**roi).coarsen(time=36, boundary='trim').mean().plot(color='C1')"
"ds_historical.isel(**roi).thetao.coarsen(time=36, boundary=\"trim\").mean().plot(\n",
" color=\"C0\", label=\"raw data\"\n",
")\n",
"ds_ssp585.isel(**roi).thetao.coarsen(time=36, boundary=\"trim\").mean().plot(color=\"C0\")\n",
"ds_historical_dedrifted.isel(**roi).coarsen(time=36, boundary=\"trim\").mean().plot(\n",
" color=\"C1\", label=\"control drift removed\"\n",
")\n",
"ds_ssp585_dedrifted.isel(**roi).coarsen(time=36, boundary=\"trim\").mean().plot(\n",
" color=\"C1\"\n",
")"
]
},
{
Expand Down
Loading

0 comments on commit 125adff

Please sign in to comment.