diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6b0112a..7d13ee4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -178,19 +178,11 @@ jobs: cache-environment: true cache-downloads: true - # - name: Install miniconda - # uses: conda-incubator/setup-miniconda@v3 - # with: - # python-version: ${{ matrix.python-version }} - # environment-file: "environment-pywatershed.yml" - # activate-environment: "gw3099pws" - # auto-activate-base: false - - name: Install pywatershed working-directory: exercises/pywatershed run: | - git clone https://github.com/EC-USGS/pywatershed.git - cd pywatershed + git clone https://github.com/EC-USGS/pywatershed.git pywatershed_repo + cd pywatershed_repo git checkout feat_gw3099 pip install -e . diff --git a/.gitignore b/.gitignore index 0c5364b..edf7d22 100644 --- a/.gitignore +++ b/.gitignore @@ -181,11 +181,11 @@ exercises-completed/gwt/ex*/ exercises-completed/gwe/gwe-ex0-stallman/ exercises-completed/parallel/base/ exercises-completed/parallel/parallel/ -exercises/pywatershed/pywatershed/ +exercises/pywatershed/pywatershed_repo exercises/pywatershed/step1_processes/ exercises/pywatershed/step2_multi-process_models/ -exercises-completed/pywatershed/step1_processes/ -exercises-completed/pywatershed/step2_multi-process_models/ +exercises/pywatershed/step3_mf6_api/ +exercises/pywatershed/step4_flow_graph/ exercises-working/ # presentations diff --git a/.scripts/test_notebooks.py b/.scripts/test_notebooks.py index 2122342..e0babba 100644 --- a/.scripts/test_notebooks.py +++ b/.scripts/test_notebooks.py @@ -19,7 +19,6 @@ pl.Path("../exercises/modflowapi/").resolve(), pl.Path("../exercises-completed/modflowapi/").resolve(), pl.Path("../exercises/parallel/").resolve(), - pl.Path("../exercises-completed/pywatershed/").resolve(), pl.Path("../exercises/netcdf/").resolve(), pl.Path("../exercises/PEST/notebooks").resolve(), pl.Path("../exercises/pywatershed").resolve(), @@ -28,6 +27,7 @@ "step0_netcdf_output": ("win32",), "step1_netcdf_input": ("win32",), "step2_netcdf_ncf": ("win32",), + "step3_mf6_api": ("win32"), } GIT_RESET_DIRS = ( diff --git a/exercises/pywatershed/gsflow_schematic.png b/exercises/pywatershed/gsflow_schematic.png new file mode 100644 index 0000000..29817c2 Binary files /dev/null and b/exercises/pywatershed/gsflow_schematic.png differ diff --git a/exercises/pywatershed/helpers.py b/exercises/pywatershed/helpers.py new file mode 100644 index 0000000..a51bc92 --- /dev/null +++ b/exercises/pywatershed/helpers.py @@ -0,0 +1,33 @@ +import pathlib as pl + +from IPython.core.magic import register_cell_magic + + +@register_cell_magic +def do_not_run_this_cell(line, cell): + return + + +def read_yaml(yaml_file: pl.Path) -> dict: + import yaml + + with pl.Path(yaml_file).open("r") as file_stream: + yaml_dict = yaml.load(file_stream, Loader=yaml.Loader) + return yaml_dict + + +def write_yaml(yaml_dict: dict, yaml_file: pl.Path): + import yaml + + with open(yaml_file, "w") as file: + _ = yaml.dump(yaml_dict, file) + return None + + +def help_head(what, n=22): + """Equivalent to help() but we get the multiline string and just look the first n lines.""" + import pydoc + + the_help = pydoc.render_doc(what, "Help on %s") + print("\n".join(the_help.splitlines()[0:n])) + return diff --git a/exercises/pywatershed/hughes_et_al_sagehen_3panel.jpg b/exercises/pywatershed/hughes_et_al_sagehen_3panel.jpg new file mode 100644 index 0000000..ff1b58e Binary files /dev/null and b/exercises/pywatershed/hughes_et_al_sagehen_3panel.jpg differ diff --git a/exercises/pywatershed/sagehen_schematic_pws.png b/exercises/pywatershed/sagehen_schematic_pws.png new file mode 100644 index 0000000..7195f2c Binary files /dev/null and b/exercises/pywatershed/sagehen_schematic_pws.png differ diff --git a/exercises/pywatershed/step0_intro_to_pywatershed.ipynb b/exercises/pywatershed/step0_intro_to_pywatershed.ipynb index 908f403..45f7ef4 100644 --- a/exercises/pywatershed/step0_intro_to_pywatershed.ipynb +++ b/exercises/pywatershed/step0_intro_to_pywatershed.ipynb @@ -84,13 +84,13 @@ "\n", "```\n", "cd /GW3099-2024/exercises/pywatershed\n", - "git clone https://github.com/EC-USGS/pywatershed.git\n", - "cd pywatershed\n", + "git clone https://github.com/EC-USGS/pywatershed.git pywatershed_repo\n", + "cd pywatershed_repo\n", "git checkout feat_gw3099\n", "pip install -e .\n", "```\n", "\n", - "When you see that version 3.0 is available at [https://github.com/EC-USGS/pywatershed/releases](https://github.com/EC-USGS/pywatershed/releases), please delete the repository at `/GW3099-2024/exercises/pywatershed/pywatershed` and repeat the above steps but checking out `git checkout v3.0.0` instead of what is listed above. Or, at that time, you can also simply `conda install pywatershed`. \n", + "When you see that version 3.0 is available at [https://github.com/EC-USGS/pywatershed/releases](https://github.com/EC-USGS/pywatershed/releases), please delete the repository at `/GW3099-2024/exercises/pywatershed/pywatershed_repo` and repeat the above steps but checking out `git checkout v3.0.0` instead of what is listed above. Or, at that time, you can also simply `conda install pywatershed`. \n", "\n", "## Learn more about pywatershed\n", "\n", diff --git a/exercises-completed/pywatershed/step1_processes.ipynb b/exercises/pywatershed/step1_processes.ipynb similarity index 97% rename from exercises-completed/pywatershed/step1_processes.ipynb rename to exercises/pywatershed/step1_processes.ipynb index b37195d..cce7625 100644 --- a/exercises-completed/pywatershed/step1_processes.ipynb +++ b/exercises/pywatershed/step1_processes.ipynb @@ -12,7 +12,8 @@ "\n", "![title](../../images/ClassLocation.jpg)\n", "\n", - "# Physical process models in pywatershed" + "# Physical process models in pywatershed\n", + "*(Note that this notebook follows nearly identically to the notebook in the pywatershed repository [examples/00_processes.ipynb](https://github.com/EC-USGS/pywatershed/blob/develop/examples/00_processes.ipynb))*" ] }, { @@ -61,7 +62,6 @@ "outputs": [], "source": [ "import pathlib as pl\n", - "import pydoc\n", "from pprint import pprint\n", "\n", "import hvplot.xarray # noqa\n", @@ -69,6 +69,7 @@ "import numpy as np\n", "import pywatershed as pws\n", "import xarray as xr\n", + "from helpers import help_head\n", "from pywatershed.utils import gis_files\n", "from tqdm.notebook import tqdm\n", "\n", @@ -96,10 +97,7 @@ "metadata": {}, "outputs": [], "source": [ - "# this is equivalent to help() but we get the multiline string and just look at part of it\n", - "prms_gw_help = pydoc.render_doc(pws.PRMSGroundwater, \"Help on %s\")\n", - "# the first 22 lines of help(pws.PRMSGroundwater)\n", - "print(\"\\n\".join(prms_gw_help.splitlines()[0:22]))" + "help_head(pws.PRMSGroundwater, n=22)" ] }, { diff --git a/exercises-completed/pywatershed/step2_multi-process_models.ipynb b/exercises/pywatershed/step2_multi-process_models.ipynb similarity index 51% rename from exercises-completed/pywatershed/step2_multi-process_models.ipynb rename to exercises/pywatershed/step2_multi-process_models.ipynb index 1ef727b..135dde3 100644 --- a/exercises-completed/pywatershed/step2_multi-process_models.ipynb +++ b/exercises/pywatershed/step2_multi-process_models.ipynb @@ -12,7 +12,8 @@ "\n", "![title](../../images/ClassLocation.jpg)\n", "\n", - "# Multi-process models in pywatershed" + "# Multi-process models in pywatershed\n", + "*(Note that this notebook follows the notebook in the pywatershed repository [examples/01_multi-process_models.ipynb](https://github.com/EC-USGS/pywatershed/blob/develop/examples/01_multi-process_models.ipynb) but it deviates in some of the details covered.)*" ] }, { @@ -27,7 +28,9 @@ "source": [ "In notebook [`step1_processes.ipynb`](step1_processes.ipynb), we looked at how individual Process representations work and are designed. In this notebook we learn how to put multiple `Processes` together into composite models using the `Model` class. \n", "\n", - "The starting point for the development of `pywatershed` was the National Hydrologic Model (NHM, Regan et al., 2018) configuration of the Precipitation-Runoff Modeling System (PRMS, Regan et al., 2015). In this notebook, we'll first construct a full NHM configuration. The spatial domain we'll use will again be the Delaware River Basin. Once we construct the full NHM, we'll look at how we can also construct sub-models of the NHM.\n", + "The starting point for the development of `pywatershed` was the National Hydrologic Model (NHM, Regan et al., 2018) configuration of the Precipitation-Runoff Modeling System (PRMS, Regan et al., 2015). In this notebook, we'll first construct a full NHM configuration. We will again use the spatial domain of the Delaware River Basin. Once we construct the full NHM, we'll look at how we can also construct sub-models of the NHM.\n", + "\n", + "Along the way, we'll get into some of the guts of using pywatershed.\n", "\n", "## Prerequisites" ] @@ -40,7 +43,7 @@ "outputs": [], "source": [ "import pathlib as pl\n", - "import pydoc\n", + "import shutil\n", "from copy import deepcopy\n", "from platform import processor\n", "from pprint import pprint\n", @@ -52,14 +55,19 @@ "import pywatershed as pws\n", "import xarray as xr\n", "import yaml\n", + "from helpers import do_not_run_this_cell, help_head, read_yaml, write_yaml\n", "from pywatershed.utils import gis_files\n", "from pywatershed.utils.path import dict_pl_to_str\n", + "from tqdm.notebook import tqdm\n", "\n", "jupyter_black.load() # auto-format the code in this notebook\n", "\n", - "pws.utils.gis_files.download() # this downloads GIS files\n", + "pws.utils.gis_files.download()\n", + "\n", + "pkg_root_dir = pws.constants.__pywatershed_root__\n", + "repo_root_dir = pkg_root_dir.parent\n", "\n", - "pkg_root_dir = pws.constants.__pywatershed_root__" + "nb_output_dir = pl.Path(\"./step2_multi-process_models\")" ] }, { @@ -79,8 +87,6 @@ "metadata": {}, "outputs": [], "source": [ - "nb_output_dir = pl.Path(\"./step2_multi-process_models\")\n", - "\n", "domain_dir = pkg_root_dir / \"data/drb_2yr\"\n", "\n", "domain_gis_dir = pkg_root_dir / \"data/pywatershed_gis/drb_2yr\"\n", @@ -157,7 +163,7 @@ "source": [ "We'll use this list of classes shortly to construct the NHM.\n", "\n", - "A multi-process model is assembled by the `Model` class. We can take a quick look at the first 21 lines of help on `Model`:" + "A multi-process model is assembled by the `Model` class. We can take a quick look at the first 22 lines of help on `Model`:" ] }, { @@ -167,10 +173,7 @@ "metadata": {}, "outputs": [], "source": [ - "# this is equivalent to help() but we get the multiline string and just look at part of it\n", - "model_help = pydoc.render_doc(pws.Model, \"Help on %s\")\n", - "# the first 22 lines of help(pws.Model)\n", - "print(\"\\n\".join(model_help.splitlines()[0:22]))" + "help_head(pws.Model, n=22)" ] }, { @@ -182,8 +185,7 @@ "\n", "With the pywatershed-centric approach, the first argument is a \"model dictionary\" which does nearly all the work (the other arguments will be their default values). The `help()` describes the model dictionary and provides examples. Please use it for reference and more details. Here we'll give an extended concrete example. The `help()` also describes how a `Model` can be instantiated from a model dictionary contained in a YAML file. First, we'll build a model dictionary in memory, then we'll write it out as a yaml file and instantiate our model directly from the YAML file. \n", "\n", - "## Model dictionary in memory\n", - "\n", + "### Construct the model specification in memory\n", "Because our (pre-existing) parameter files (which come with `pywatershed`) and our `Process` classes are consistently named, we can begin to build the model dictionary quickly." ] }, @@ -243,7 +245,7 @@ "id": "059641b6-8fbb-49b4-a95f-4e7fd29af3a6", "metadata": {}, "source": [ - "We have given a name to each process and then supplied the class, its parameters, and its discretization for the full set of processes. Now we'll need to add the discretizations to the model dictionary. They are added at the top level and correspond to the names the processes used. " + "We have given a name to each process and then supplied the class, its parameters, and its discretization for the full set of processes. Now we'll need to add the discretizations to the model dictionary. They are added at the top level and correspond to `dis` the names the processes used. " ] }, { @@ -276,9 +278,7 @@ "source": [ "For the time being, `PRMSChannel` needs to know about both HRUs and segments, so `dis_both` is used. We plan to remove this requirement in the near future by implementing \"exchanges\" between processes into the model dictionary.\n", "\n", - "You may have noticed that we are missing a `Control` object to provide time information to the processes. We'll create it and we'll also create a list of the order that the processes are executed.\n", - "\n", - "Though we have input available to run 2 years of simulation, we'll restrict the model run to the first 6 months for demonstration purposes. (Feel free to increase this to the full 2 years available, if you like.)" + "You may have noticed that we are missing a `Control` object to provide time information to the processes. We'll create it and we'll also create a list of the order that the processes are executed." ] }, { @@ -288,14 +288,15 @@ "metadata": {}, "outputs": [], "source": [ + "run_dir = nb_output_dir / \"run_dir\"\n", "control = pws.Control(\n", " start_time=np.datetime64(\"1979-01-01T00:00:00\"),\n", - " end_time=np.datetime64(\"1979-07-01T00:00:00\"),\n", + " end_time=np.datetime64(\"1980-12-31T00:00:00\"),\n", " time_step=np.timedelta64(24, \"h\"),\n", " options={\n", " \"input_dir\": domain_dir,\n", " \"budget_type\": \"error\",\n", - " \"netcdf_output_dir\": nb_output_dir / \"nhm_memory\",\n", + " \"netcdf_output_dir\": run_dir,\n", " },\n", ")\n", "model_order = [\"prms_\" + proc.__name__[4:].lower() for proc in nhm_processes]\n", @@ -308,7 +309,9 @@ "id": "546a7ac8-c23e-4632-b655-6dc8e884172d", "metadata": {}, "source": [ - "The `model_dict` now specifies a complete model built from multiple processes. They way these processes are connected can be figured out by the `Model` class, because each process fully describes itself (as we saw in the previous notebook). If we instantiate a model from this `model_dict`," + "### Instantiate the model\n", + "\n", + "The `model_dict` above now specifies a complete model built from multiple processes. Connecting the processes is handled by the `Model` class which can figure it all out because each process fully describes itself (as we saw in the previous notebook), including its inputs and variables. If we instantiate a model from this `model_dict`," ] }, { @@ -318,7 +321,7 @@ "metadata": {}, "outputs": [], "source": [ - "model_mem = pws.Model(model_dict)" + "model = pws.Model(model_dict)" ] }, { @@ -326,7 +329,8 @@ "id": "7260278e-91ff-47af-a2c1-b9c02e8beaa1", "metadata": {}, "source": [ - "we can examine how the `Processes` are all connected using the `ModelGraph` class. We'll bring in the default color scheme for NHM `Processes`." + "### ModelGraph\n", + "Now we can examine how the `Processes` are all connected using the `ModelGraph` class. We'll bring in the default color scheme for NHM `Processes`." ] }, { @@ -341,12 +345,12 @@ }, "outputs": [], "source": [ - "palette = pws.analysis.utils.colorbrewer.nhm_process_colors(model_mem)\n", + "palette = pws.analysis.utils.colorbrewer.nhm_process_colors(model)\n", "pws.analysis.utils.colorbrewer.jupyter_palette(palette)\n", "show_params = not (platform == \"darwin\" and processor() == \"arm\")\n", "try:\n", " pws.analysis.ModelGraph(\n", - " model_mem,\n", + " model,\n", " hide_variables=False,\n", " process_colors=palette,\n", " show_params=show_params,\n", @@ -366,9 +370,9 @@ "id": "e57ed289-17ba-4925-922d-d8e6f01bbb6b", "metadata": {}, "source": [ - "### Questions:\n", + "### Questions\n", "* What are the inputs for this model and where are these found? Is there anything special about those files? Could we drive any process from file?\n", - "* Can you see from where each process gets its inputs in this model? What is the largest number of other processes a single process draws inputs from?\n", + "* Can you see where each process gets its inputs from in this model? What is the largest number of other processes a single process draws inputs from?\n", "* Are some of the arrows 2-way?\n", "* Which processes are mass conservative? Can you see the terms involved in mass conservation?\n", "* Which process has the greatest/smallest ratio of number of parameters to number of variables?" @@ -379,6 +383,7 @@ "id": "bbc28c9b-8d33-479f-b4f8-b8985d86d9c8", "metadata": {}, "source": [ + "### Run the model\n", "Now we'll initialize NetCDF output and run the model." ] }, @@ -390,7 +395,7 @@ "outputs": [], "source": [ "%%time\n", - "model_mem.run(finalize=True)" + "model.run(finalize=True)" ] }, { @@ -398,951 +403,662 @@ "id": "9e5cd00e-46a5-4778-abc5-f3566f7fe74a", "metadata": {}, "source": [ - "We'll take a look at the outputs after we see how to implement this model using a YAML file on disk. " - ] - }, - { - "cell_type": "markdown", - "id": "2728d285-f0cd-43b0-96ff-9d019c7e0fda", - "metadata": {}, - "source": [ - "## Model dictionary yaml file\n", - "It may be preferable to have a model dictionary encoded in YAML file in many situations, instead of in Python code. Necessarily, the contents of the YAML file will look different than the code above where we had the contents of the model dictionary in memory.\n", - "\n", - "We'll create a new directory for our YAML-based run. Then we'll write the existing Control instance as a YAML file in that directory." + "Now we have a finalized run of our model. Finalizing is important mainly so that open output files are closed. We can quite easily look at all the output resulting from our run by looking at the netcdf files in the run directory. " ] }, { "cell_type": "code", "execution_count": null, - "id": "bcbdca24-7347-4ae7-91de-cee896461e08", - "metadata": {}, - "outputs": [], - "source": [ - "run_dir = pl.Path(nb_output_dir / \"nhm_yaml\")\n", - "run_dir.mkdir(exist_ok=True)\n", - "control_yaml_file = run_dir / \"control.yaml\"\n", - "control_yaml = deepcopy(control)\n", - "control_yaml.options[\"netcdf_output_dir\"] = run_dir\n", - "control_yaml.to_yaml(control_yaml_file)" - ] - }, - { - "cell_type": "markdown", - "id": "0aaa0705-ac8d-455d-9f56-8ba7b7353c44", + "id": "02f305fd-0cfa-4b89-a1fb-c32e72490b8a", "metadata": { "editable": true, "slideshow": { "slide_type": "" } }, - "source": [ - "We add the option `netcdf_output_dir` to the control since we assume we wont be able to do so at run time. Note that this option and the `input_dir` option are `pathlib.Path` objects." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8044016c-7728-4149-9be5-f52b8f43975d", - "metadata": {}, "outputs": [], "source": [ - "control" + "output_files = sorted(run_dir.glob(\"*.nc\"))\n", + "print(len(output_files))\n", + "pprint(output_files)" ] }, { "cell_type": "markdown", - "id": "c56e0f88-1842-406a-94f4-d45a918783dd", + "id": "e4a201df-6a88-4cb3-ada8-95b8f949de89", "metadata": {}, "source": [ - "But the `to_yaml()` method converts these to strings in the YAML file for us:" + "The following code will let us examine output variables, plotting the full timeseries at individual locations which can be scrolled through using the bar on the right side. It will not work to look at the out budget output files, however. Note, this plot is not a custom plot function. It is base functionality in hvplot (with an xarray backend). Because of all the work getting dimensions and metadata into the NetCDF file, the scroll on the spatial dimension is appropriately named, the y-axis is appropriately labeled with units, and the time axis looks sharp." ] }, { "cell_type": "code", "execution_count": null, - "id": "1ad1007d-6890-4ef9-83ec-6591a589732e", + "id": "ea5e4070-6af5-4ff3-921e-fae61e3a8fce", "metadata": {}, "outputs": [], "source": [ - "with control_yaml_file.open(\"r\") as file:\n", - " for line in file:\n", - " print(line[0:-1]) # drop the line break to condense" + "var = \"seg_outflow\"\n", + "var_da = xr.load_dataarray(run_dir / f\"{var}.nc\")\n", + "var_da.hvplot(groupby=var_da.dims[1])" ] }, { "cell_type": "markdown", - "id": "a51538c2-6431-4866-adff-eb9e15acb15c", - "metadata": {}, - "source": [ - "Now we'll create the model dictionary. For the `control` field, we'll need the path to the YAML file to which we will write the information above. For discretization fields, we'll pass paths to NetCDF files instead of instantiated `Parameter` objects. For `model_order` we can supply the same list we used above. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1fe43720-4df3-4b34-b964-ba2af481c9a0", - "metadata": {}, - "outputs": [], - "source": [ - "model_dict = {\n", - " \"control\": control_yaml_file.resolve(),\n", - " \"dis_hru\": domain_dir / \"parameters_dis_hru.nc\",\n", - " \"dis_both\": domain_dir / \"parameters_dis_both.nc\",\n", - " \"model_order\": model_order,\n", - "}" - ] - }, - { - "cell_type": "markdown", - "id": "69e563b5-d540-4055-8e5e-9b8cb920ddda", - "metadata": {}, - "source": [ - "Now, for each process, we'll use an arbitray name. Then we'll supply the class name (which can be imported from `pws` at the top level), the path to its NetCDF parameter file, and the name of its required discretization." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b78b326c-105b-4a70-a374-3da019993b9a", - "metadata": {}, - "outputs": [], - "source": [ - "for proc in nhm_processes:\n", - " proc_name = proc.__name__\n", - " proc_rename = \"prms_\" + proc_name[4:].lower()\n", - " model_dict[proc_rename] = {}\n", - " proc_dict = model_dict[proc_rename]\n", - " proc_dict[\"class\"] = proc_name\n", - " proc_param_file = domain_dir / f\"parameters_{proc_name}.nc\"\n", - " proc_dict[\"parameters\"] = proc_param_file\n", - " if proc_rename == \"prms_channel\":\n", - " proc_dict[\"dis\"] = \"dis_both\"\n", - " else:\n", - " proc_dict[\"dis\"] = \"dis_hru\"" - ] - }, - { - "cell_type": "markdown", - "id": "d1162367-7898-4a6a-bf05-3cf79fcfa99a", + "id": "3078346e-8b0f-48f0-b7a9-4200714b403b", "metadata": {}, "source": [ - "Before looking at it, we'll convert `Path` objects to strings using a utility in pywatershed, `dict_pl_to_str`: " + "We'll plot the last variable in the loop, `unused_potet`:" ] }, { "cell_type": "code", "execution_count": null, - "id": "c6d939a7-4842-4329-b278-d0861556ea50", - "metadata": {}, + "id": "45454b13-020b-4867-bd34-21c8cdb00834", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + } + }, "outputs": [], "source": [ - "model_dict = dict_pl_to_str(model_dict)\n", - "pprint(model_dict, sort_dicts=False)" - ] - }, - { - "cell_type": "markdown", - "id": "a6fddaf2-d3d0-46b2-b463-9708c03ab6ac", - "metadata": {}, - "source": [ - "A note on paths in the yaml file. Because we are using files in two different locations which are not easily described relative to the location of yaml file, we are using absolute paths. However, one can also describe all paths relative to the location of the yaml file if that is more suitable to your purposes. \n", + "%%do_not_run_this_cell\n", + "proc_plot = pws.analysis.process_plot.ProcessPlot(gis_files.gis_dir / \"drb_2yr\")\n", + "proc_classes = [model_dict[nn][\"class\"] for nn in model_order]\n", "\n", - "Finally, we have the control and model dictionaries ready to write to yaml." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7caddaf9-37f3-40a3-b054-6a275f0f3e2d", - "metadata": {}, - "outputs": [], - "source": [ - "model_dict_yaml_file = run_dir / \"model_dict.yaml\"\n", - "with open(model_dict_yaml_file, \"w\") as file:\n", - " _ = yaml.dump(model_dict, file)" - ] - }, - { - "cell_type": "markdown", - "id": "4230eeac-3feb-4534-896f-442cc9e85451", - "metadata": {}, - "source": [ - "Examine the written model dictionary YAML file:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bcc2a81f-bac2-47a6-b161-07088c96e185", - "metadata": {}, - "outputs": [], - "source": [ - "with model_dict_yaml_file.open(\"r\") as file:\n", - " for line in file:\n", - " print(line[0:-1]) # drop the line break to condense" + "\n", + "def get_var_proc_class(var_name):\n", + " for proc_class in proc_classes:\n", + " if var_name in proc_class.get_variables():\n", + " return proc_class\n", + "\n", + "\n", + "proc_plot.plot_hru_var(\n", + " var_name=var,\n", + " process=get_var_proc_class(var),\n", + " data=var_da.mean(dim=\"time\"),\n", + " data_units=var_da.attrs[\"units\"],\n", + " nhm_id=var_da[\"nhm_id\"],\n", + ")" ] }, { "cell_type": "markdown", - "id": "b3ec433f-908e-4519-9abf-9e5527d1844c", - "metadata": {}, - "source": [ - "Now we can create a `Model` from these:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7891df03-efd2-44b5-9a9e-3ba61a72fa23", + "id": "c97d3fff-4a83-4a4e-a22f-ed5da54412cd", "metadata": {}, - "outputs": [], "source": [ - "model_yaml = pws.Model.from_yaml(model_dict_yaml_file)\n", - "model_yaml" + "We can also make a spatial plot of the streamflow using a transform for line width representation. " ] }, { "cell_type": "code", "execution_count": null, - "id": "9e5709af-c7bf-422b-a3ff-559bf62d669f", + "id": "50bc3067-9f56-4483-9ce6-d6fe8b1e0f09", "metadata": {}, "outputs": [], "source": [ - "show_params = not (platform == \"darwin\" and processor() == \"arm\")\n", - "try:\n", - " pws.analysis.ModelGraph(\n", - " model_yaml,\n", - " hide_variables=False,\n", - " process_colors=palette,\n", - " show_params=show_params,\n", - " ).SVG(verbose=True, dpi=48)\n", - "except:\n", - " static_url = \"https://github.com/EC-USGS/pywatershed/releases/download/1.1.0/notebook_01_cell_11_model_graph.png\"\n", - " print(\n", - " f\"Dot fails on some machines. You can see the graph at this url: {static_url}\"\n", - " )\n", - " from IPython.display import Image\n", + "# var = \"seg_outflow\"\n", + "# var_da = xr.open_dataarray(run_dir / f\"{var}.nc\")\n", "\n", - " display(Image(url=static_url, width=1300))" + "# def xform_width(vals):\n", + "# flow_log = np.maximum(np.log(vals + 1.0e-4), 0.0)\n", + "# width_max = 5\n", + "# width_min = 0.2\n", + "# flow_log_lw = (width_max - width_min) * (flow_log - np.min(flow_log)) / (\n", + "# np.max(flow_log) - np.min(flow_log)\n", + "# ) + width_min\n", + "# return flow_log_lw\n", + "\n", + "\n", + "# proc_plot.plot(\n", + "# var,\n", + "# process=get_var_proc_class(var),\n", + "# value_transform=xform_width,\n", + "# data=var_da.mean(dim=\"time\"),\n", + "# title=f\"{var}\",\n", + "# aesthetic_width=True,\n", + "# )\n", + "\n", + "# #proc_plot.plot(var_name, proc, title=var_name)" ] }, { "cell_type": "markdown", - "id": "c78d3fa6-722b-404f-a60f-6778dbb8109a", + "id": "2ae49ade-3c8b-4fe8-af5a-9497f856151b", "metadata": {}, "source": [ - "That looks identical to the `model_mem` model constructed previously. Let's run the model." + "### Reduce model output to disk\n", + "Quite a lot of output was written in the above example. In many cases, the amount of model output can be reduced in favor of imporving/reducing model run time. In the next cell, we show how you would reduce the output by setting `control.options['netcdf_output_var_names]`. We'll suppose that we only want the output variables from the `PRMSGroundwater` and `PRMSChannel` processes. Note that we are just combining the variable names returned by these two processes' `.get_variables()` methods. However, we could specify any list of variable names we like (variable names not present in the model are ignored silently, so spelling obviously matters). We dont run this cell, we just show what code you'd change above." ] }, { "cell_type": "code", "execution_count": null, - "id": "842a2b97-fb2e-4413-8d32-c8aa08b31dc6", + "id": "284d8184-9837-4c01-97db-ea1e53f956ac", "metadata": {}, "outputs": [], "source": [ - "%%time\n", - "model_yaml.run()\n", - "model_yaml.finalize()" + "%%do_not_run_this_cell\n", + "desired_output = [\n", + " *pws.PRMSGroundwater.get_variables(),\n", + " *pws.PRMSChannel.get_variables(),\n", + "]control_cp.options[\"netcdf_output_var_names\"] = desired_output" ] }, { "cell_type": "markdown", - "id": "4e68f6e7-f707-4012-a74b-fc85020ff86a", - "metadata": {}, - "source": [ - "## Compare the outputs of the two models\n", - "Let's see that these constructed and executed the same models." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "02f305fd-0cfa-4b89-a1fb-c32e72490b8a", + "id": "b420c326-be66-41df-82f1-3c332c1f85ea", "metadata": { "editable": true, "slideshow": { "slide_type": "" } }, - "outputs": [], "source": [ - "mem_out_dir = nb_output_dir / \"nhm_memory\"\n", - "yaml_out_dir = nb_output_dir / \"nhm_yaml\"\n", - "mem_files = sorted(mem_out_dir.glob(\"*.nc\"))\n", - "yaml_files = sorted(yaml_out_dir.glob(\"*.nc\"))\n", - "# We get all the same output files\n", - "assert set([ff.name for ff in mem_files]) == set(\n", - " [ff.name for ff in yaml_files]\n", - ")" + "When I reduce the original ~150 output files to just those specified in the above cell, run time is reduced by about 60% on my Mac." ] }, { "cell_type": "markdown", - "id": "1024fe62-ac86-4171-8c45-018f1ac35902", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - } - }, - "source": [ - "Now compare the values of all variables:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "523633e0-bb6e-4fa7-8a7a-bd2de3d602b0", + "id": "89e3d4c4-8bea-44e2-be76-b274bf04c3d7", "metadata": {}, - "outputs": [], "source": [ - "nb_output_dir.resolve()" + "## NHM Submodel for the Delaware River Basin \n", + "In many cases, running the full NHM model may not be necessary and it may be advantageous to just run some of the processes in it. Pywatershed gives you this flexibility. Suppose you wanted to change parameters or model process representation in just the PRMSSoilzone to better predict streamflow. As the model is 1-way coupled, you can simply run a submodel starting with PRMSSoilzone and running through PRMSChannel. \n", + "\n", + "In this example we'll construct our model using YAML file, instead of in memory as above. To see how this works, we'll start from a YAML file that specifies the full NHM that we ran above." ] }, { "cell_type": "code", "execution_count": null, - "id": "db0d920e-0227-45ba-92c6-8ffacab9ca3c", + "id": "cd19a089-4828-4d80-9b0d-77133a93f3a2", "metadata": {}, "outputs": [], "source": [ - "for mf, yf in zip(mem_files, yaml_files):\n", - " var = mf.with_suffix(\"\").name\n", - "\n", - " if \"budget\" in var.lower():\n", - " continue\n", - "\n", - " mda = xr.open_dataset(mf)[var]\n", - " yda = xr.open_dataset(yf)[var]\n", - " xr.testing.assert_equal(mda, yda)\n", - " mda.close()\n", - " yda.close()" + "model_dict_yaml_file = repo_root_dir / \"test_data/drb_2yr/nhm_model.yaml\"\n", + "model_dict_yaml = read_yaml(model_dict_yaml_file)\n", + "display(model_dict_yaml)" ] }, { "cell_type": "markdown", - "id": "3078346e-8b0f-48f0-b7a9-4200714b403b", + "id": "0f64163a-339e-4539-bc4f-00adad1d2ae6", "metadata": {}, "source": [ - "We'll plot the last variable in the loop, `unused_potet`:" + "We can see above that a YAML file specifies data via a control YAML files and all other data via NetCDF files. All other fields are strings. \n", + "\n", + "Let's write our own YAML file for our submodel. Files specified with relative paths are relative the location of the YAML file itself. We want to put this YAML file in to a new run directory, so we'll want to supply paths to existing files and since we dont want/need to copy those, we'll use absolute paths. All `pl.Path`s must be converted to `str`s in the YAML representation. " ] }, { "cell_type": "code", "execution_count": null, - "id": "7213a424-db51-4f36-974f-56c2b4ea049d", + "id": "be9e1807-bed4-4c52-99e0-b5a58e63eaf7", "metadata": {}, "outputs": [], "source": [ - "mda.hvplot(groupby=\"nhm_id\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "45454b13-020b-4867-bd34-21c8cdb00834", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - } - }, - "outputs": [], - "source": [ - "proc_plot = pws.analysis.process_plot.ProcessPlot(\n", - " gis_files.gis_dir / \"drb_2yr\"\n", - ")\n", - "proc_plot.plot_hru_var(\n", - " var_name=var,\n", - " process=pws.PRMSAtmosphere,\n", - " data=mda.mean(dim=\"time\"),\n", - " data_units=mda.attrs[\"units\"],\n", - " nhm_id=mda[\"nhm_id\"],\n", - ")" + "model_dict_new = {\n", + " \"control\": \"nhm_control.yaml\",\n", + " \"dis_hru\": str(domain_dir / \"parameters_dis_hru.nc\"),\n", + " \"dis_both\": str(domain_dir / \"parameters_dis_both.nc\"),\n", + " \"soilzone\": {\n", + " \"class\": \"PRMSSoilzone\",\n", + " \"parameters\": str(domain_dir / \"parameters_PRMSSoilzone.nc\"),\n", + " \"dis\": \"dis_hru\",\n", + " },\n", + " \"groundwater\": {\n", + " \"class\": \"PRMSGroundwater\",\n", + " \"parameters\": str(domain_dir / \"parameters_PRMSGroundwater.nc\"),\n", + " \"dis\": \"dis_hru\",\n", + " },\n", + " \"channel\": {\n", + " \"class\": \"PRMSChannel\",\n", + " \"parameters\": str(domain_dir / \"parameters_PRMSChannel.nc\"),\n", + " \"dis\": \"dis_both\",\n", + " },\n", + " \"model_order\": [\"soilzone\", \"groundwater\", \"channel\"],\n", + "}" ] }, { "cell_type": "markdown", - "id": "c97d3fff-4a83-4a4e-a22f-ed5da54412cd", + "id": "c547f876-b323-4955-9394-40abbaba32fe", "metadata": {}, "source": [ - "We can also make a spatial plot of the streamflow at the final time. " + "We'll need to place a control YAML file in our run dir since that's where we said it would be. We'll use a control YAML file that is used for running the full model as a staring point. But will we need to edit it? Let's take a look." ] }, { "cell_type": "code", "execution_count": null, - "id": "50bc3067-9f56-4483-9ce6-d6fe8b1e0f09", + "id": "5cc0f5d0-42ec-42bc-927b-bf3424568f3b", "metadata": {}, "outputs": [], "source": [ - "proc_plot = pws.analysis.ProcessPlot(gis_files.gis_dir / \"drb_2yr\")\n", - "proc_name = \"prms_channel\"\n", - "var_name = \"seg_outflow\"\n", - "proc = model_yaml.processes[proc_name]\n", - "\n", - "\n", - "def xform(vals):\n", - " return np.log(vals + 1.0e-4)\n", - "\n", - "\n", - "def xform_width(vals):\n", - " flow_log = np.maximum(np.log(vals + 1.0e-4), 0.0)\n", - " width_max = 5\n", - " width_min = 0.2\n", - " flow_log_lw = (width_max - width_min) * (flow_log - np.min(flow_log)) / (\n", - " np.max(flow_log) - np.min(flow_log)\n", - " ) + width_min\n", - " return flow_log_lw\n", - "\n", - "\n", - "proc_plot.plot(\n", - " var_name,\n", - " proc,\n", - " value_transform=xform_width,\n", - " title=f\"log {var_name}\",\n", - " aesthetic_width=True,\n", - ")\n", - "proc_plot.plot(var_name, proc, title=var_name)" + "model_control_yaml_file = repo_root_dir / \"test_data/drb_2yr/nhm_control.yaml\"\n", + "model_control_yaml = read_yaml(model_control_yaml_file)\n", + "display(model_control_yaml)" ] }, { "cell_type": "markdown", - "id": "2ae49ade-3c8b-4fe8-af5a-9497f856151b", + "id": "fddbfc6f-ba55-4826-a755-4bba96e2cbe9", "metadata": {}, "source": [ - "## Reduce model output to disk\n", - "It's worth noting that quite a lot of output was written and that in many cases the amount of output can be reduced in favor of imporving/reducing model run time. Let's show how easily that can be done.\n", - "\n", - "Because we want to reuse the above control dict and model dict for the submodel demonstration in the next section, we'll deep copy them for this purpose." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "25f00265-1d4c-462e-9be1-bf8cd82261ed", - "metadata": {}, - "outputs": [], - "source": [ - "run_dir = pl.Path(nb_output_dir / \"yaml_less_output\").resolve()\n", - "run_dir.mkdir(exist_ok=True)" + "Looking closely at this, we'll notice that `input_dir` is not specified. Trying to instantiate a model will throw an error telling us this. But where do we get our inputs? What are our inputs? What were our inputs above? Maybe, let's try that same directory we used for the full model." ] }, { "cell_type": "code", "execution_count": null, - "id": "284d8184-9837-4c01-97db-ea1e53f956ac", + "id": "e1a9283a-491e-4a87-83de-0df354ed1573", "metadata": {}, "outputs": [], "source": [ - "control_cp = deepcopy(control)\n", - "control_cp.options[\"netcdf_output_dir\"] = str(run_dir.resolve())\n", - "control_cp.options[\"netcdf_output_var_names\"] = [\n", - " var\n", - " for ll in [\n", - " pws.PRMSGroundwater.get_variables(),\n", - " pws.PRMSChannel.get_variables(),\n", - " ]\n", - " for var in ll\n", - "]\n", - "print(control_cp) # .to_dict(), sort_dicts=False)\n", - "\n", - "control_yaml_file = run_dir / \"control.yaml\"\n", - "control_cp.to_yaml(control_yaml_file)" + "model_control_yaml[\"input_dir\"] = str(control.options[\"input_dir\"])\n", + "run_dir_submodel = nb_output_dir / \"run_dir_submodel\"\n", + "run_dir_submodel.mkdir(exist_ok=True)\n", + "model_control_yaml[\"netcdf_output_dir\"] = str(run_dir_submodel)" ] }, { "cell_type": "markdown", - "id": "4eea5150-fe9a-4c6d-95bd-9ed80c061457", + "id": "eb81e3de-a1be-4bb1-93ae-b95090dedb42", "metadata": {}, "source": [ - "Now we will use the existing `model_dict` in memory, tayloring to the above and just keeping the processes of interest in the submodel." + "Now let's write out our model and control YAML files." ] }, { "cell_type": "code", "execution_count": null, - "id": "b02da6d0-e397-4a88-aff5-c4c83e8f1a13", + "id": "76da14c3-6571-4db6-a263-6fa8afd5e5cf", "metadata": {}, "outputs": [], "source": [ - "model_dict_copy = deepcopy(model_dict)\n", - "model_dict_copy[\"control\"] = str(control_yaml_file)\n", - "model_dict_yaml_file = run_dir / \"model_dict.yaml\"" + "submodel_yaml_file = run_dir_submodel / \"submodel.yaml\"\n", + "write_yaml(model_dict_new, submodel_yaml_file)\n", + "write_yaml(\n", + " model_control_yaml, run_dir_submodel / \"nhm_control.yaml\"\n", + ") # as specified in model_dict_new" ] }, { "cell_type": "markdown", - "id": "020a6a11-c36b-4f4b-bca5-879105083fe5", + "id": "6c502d9b-3074-4a1d-ac3f-624f19c6419d", "metadata": {}, "source": [ - "Now we write both the control and model dictionary to yaml files." + "We'll run the model from YAML files." ] }, { "cell_type": "code", "execution_count": null, - "id": "051df876-2e37-4857-976b-ef6c0d586985", + "id": "2963ea8b-8385-46f9-860f-2cfff55564d1", "metadata": {}, "outputs": [], "source": [ - "with open(model_dict_yaml_file, \"w\") as file:\n", - " _ = yaml.dump(model_dict_copy, file)" + "try:\n", + " submodel = pws.Model.from_yaml(submodel_yaml_file)\n", + "except Exception as error:\n", + " print(\"An exception occurred:\", error) #" ] }, { "cell_type": "markdown", - "id": "2a275786-239b-4f8a-9643-e65ad6ef9dc8", - "metadata": {}, - "source": [ - "And finally we instantiate the reduced output from the model dictionary yaml file. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2cc3f3f6-3d5c-4e4f-84c4-5203db595dc0", + "id": "b8ff220a-95bf-4044-9d77-76d1d1db1b0d", "metadata": {}, - "outputs": [], "source": [ - "model_less_output = pws.Model.from_yaml(model_dict_yaml_file)\n", - "model_less_output" + "We got an error that the `potet.nc` file was not found. What is going on? Why is that an input file? Let's take a look at the `ModelGraph` for this submodel." ] }, { "cell_type": "code", "execution_count": null, - "id": "0f62c723-28ae-4db0-939e-03e477c6f2e8", + "id": "7a06dd22-e240-4b17-b60f-0a1fb42dca51", "metadata": {}, "outputs": [], "source": [ - "%%time\n", - "model_less_output.run(finalize=True)" - ] - }, - { - "cell_type": "markdown", - "id": "b420c326-be66-41df-82f1-3c332c1f85ea", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - } - }, - "source": [ - "Reducing the output significantly reduced the time, in this case (on my machine) from 25s to 15s, or about 60%." + "show_params = not (platform == \"darwin\" and processor() == \"arm\")\n", + "try:\n", + " pws.analysis.ModelGraph(\n", + " submodel,\n", + " hide_variables=False,\n", + " process_colors=palette,\n", + " show_params=show_params,\n", + " ).SVG(verbose=True, dpi=48)\n", + "except:\n", + " static_url = \"https://github.com/EC-USGS/pywatershed/releases/download/1.1.0/notebook_01_cell_45_submodel_graph.png\"\n", + " print(\n", + " f\"Dot fails on some machines. You can see the graph at this url: {static_url}\"\n", + " )\n", + " from IPython.display import Image\n", + "\n", + " display(Image(url=static_url, width=700))" ] }, { "cell_type": "markdown", - "id": "89e3d4c4-8bea-44e2-be76-b274bf04c3d7", + "id": "c367045a-34f8-4541-8f78-ea886e6fe29e", "metadata": {}, "source": [ - "## NHM Submodel for the Delaware River Basin \n", - "In many cases, running the full NHM model may not be necessary and it may be advantageous to just run some of the processes in it. Pywatershed gives you this flexibility. Suppose you wanted to change parameters or model process representation in the PRMSSoilzone to better predict streamflow. As the model is 1-way coupled, you can simply run a submodel starting with PRMSSoilzone and running through PRMSChannel." + "OK, the submodel has a different set of inputs that the `ModelGraph` clearly shows. That's cool, but where will we find those files? Remember when we ran the full model above? Maybe it output the required inputs? How could we check this?" ] }, { "cell_type": "code", "execution_count": null, - "id": "be9e1807-bed4-4c52-99e0-b5a58e63eaf7", + "id": "43cd7229-49ad-4bce-b097-930660f8d90f", "metadata": {}, "outputs": [], "source": [ - "submodel_processes = [pws.PRMSSoilzone, pws.PRMSGroundwater, pws.PRMSChannel]" - ] - }, - { - "cell_type": "markdown", - "id": "cd91adfe-a875-4873-b8ba-85778a7cb651", - "metadata": {}, - "source": [ - "This prompts the question, what inputs/forcing data do we need for this submodel? We can ask each individual process for its inputs" + "all_inputs = [\n", + " *pws.PRMSSoilzone.get_inputs(),\n", + " *pws.PRMSRunoff.get_inputs(),\n", + " *pws.PRMSChannel.get_inputs(),\n", + "]\n", + "all_run_output_names = [ff.name[0:-3] for ff in sorted(run_dir.glob(\"*.nc\"))]" ] }, { "cell_type": "code", "execution_count": null, - "id": "cdbc5725-799e-435c-8fdd-86bce771bdd5", + "id": "240ed33b-9596-459a-8e5d-6cfa8bacd144", "metadata": {}, "outputs": [], "source": [ - "submodel_input_dict = {\n", - " pp.__name__: pp.get_inputs() for pp in submodel_processes\n", - "}\n", - "pprint(submodel_input_dict)" + "set(all_inputs).difference(set(all_run_output_names))" ] }, { "cell_type": "markdown", - "id": "002e6e21-c16d-404f-9910-e764f18215b9", + "id": "d24535f1-3293-491e-b681-9ff2df29e2cd", "metadata": {}, "source": [ - "And which inputs are supplied by variables within this submodel? We ask each process for its variables." + "Oh snap! All the inputs files are available from the first run. Let's fix our control's `input_dir`. " ] }, { "cell_type": "code", "execution_count": null, - "id": "20b97c7e-7d2e-4279-a7ee-b674679a180d", + "id": "87269149-5430-4326-8406-8b07cd446755", "metadata": {}, "outputs": [], "source": [ - "submodel_vars_dict = {\n", - " pp.__name__: pp.get_variables() for pp in submodel_processes\n", - "}\n", - "pprint(submodel_vars_dict)" - ] - }, - { - "cell_type": "markdown", - "id": "3ed052fe-a46d-4470-8df5-bf03e2ecbb7d", - "metadata": {}, - "source": [ - "We consolidate inputs and variables (each over all processes) and take a set difference of inputs and variables to know what inputs/forcings we need from file. " + "model_control_yaml[\"input_dir\"] = str(run_dir.resolve())\n", + "write_yaml(\n", + " model_control_yaml, run_dir_submodel / \"nhm_control.yaml\"\n", + ") # as specified in model_dict_new" ] }, { "cell_type": "code", "execution_count": null, - "id": "b99082da-cd5d-4c98-b57c-f716ebd01f77", + "id": "37c4af3a-252a-4c5b-8f0a-41e73f039937", "metadata": {}, "outputs": [], "source": [ - "submodel_inputs = set([ii for tt in submodel_input_dict.values() for ii in tt])\n", - "submodel_variables = set(\n", - " [ii for tt in submodel_vars_dict.values() for ii in tt]\n", - ")\n", - "submodel_file_inputs = tuple(submodel_inputs - submodel_variables)\n", - "pprint(submodel_file_inputs)" + "submodel = pws.Model.from_yaml(submodel_yaml_file)" ] }, { "cell_type": "markdown", - "id": "6c789efb-b982-48d2-91cd-c555820fccb3", + "id": "c96978bd-9c0b-4fae-b035-4565f9f4ce58", "metadata": {}, "source": [ - "And where will we get these input files? You'll notice that these files do not come with the repository. Instead they are generated when we ran the full NHM model above." + "The model instantiated just fine. While we could just do `submodel.run(finalize=True)`, that'd be too easy. Let's write the expansion of the run loop implemented under the hood of the `Model` class so you can see how you might explore the internals of the a `Model` instance. You can see some basics of the relationship of a `Model` to its `Processes`." ] }, { "cell_type": "code", "execution_count": null, - "id": "1b453372-b218-4936-9046-3b47cf64460b", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - } - }, + "id": "2d9410e6-6b64-420f-8940-d743286ce287", + "metadata": {}, "outputs": [], "source": [ - "yaml_output_dir = pl.Path(control.options[\"netcdf_output_dir\"])\n", - "for ii in submodel_file_inputs:\n", - " input_file = yaml_output_dir / f\"{ii}.nc\"\n", - " assert input_file.exists()\n", - " print(input_file)" + "%%time\n", + "submodel.initialize_netcdf()\n", + "for tt in tqdm(range(control.n_times)):\n", + " submodel.control.advance()\n", + " for cls in submodel.process_order:\n", + " submodel.processes[cls].advance()\n", + " submodel.processes[cls].calculate(1.0)\n", + " submodel.processes[cls].output()\n", + "\n", + "submodel.finalize()" ] }, { "cell_type": "markdown", - "id": "e933dcd1-ce14-4113-b829-693e830e9172", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - } - }, + "id": "c787a163-c4dd-4826-b0f2-73e1b006c081", + "metadata": {}, "source": [ - "Well, that was a lot of work. But, as alluded to above, the `Model` object does the above so you dont have to. You just learned something about how the flow of information between processes is enabled by the design and how one can query individual processes in `pywatershed`. But we could instantiate the submodel and plot this wiring up, just as we plotted the `ModelGraph` of the full model. We'll create the submodel in a new `run_dir` and we'll use outputs from the full model above as inputs to this submodel." + "Well, the submodel saved us some time. Again, about 60% of the original run time (like when reducing the number of output variables). Below, we'll show that the submodel run is identical to the original run, for the processes included. \n", + "\n", + "First, let's lookat the internals of the `submodel in a bit more detail. The final time is still in memory so we can take a closer look at, say, recharge. We'll look at its metadata, its dimensions, shape, type, and dtype in the next cell. " ] }, { "cell_type": "code", "execution_count": null, - "id": "b7ef25e0-7d30-446a-acfd-28af3d6e5ddd", + "id": "6bbf15be-871b-4b58-ad1f-45bfcb37fedc", "metadata": {}, "outputs": [], "source": [ - "run_dir = pl.Path(nb_output_dir / \"nhm_sub\").resolve()\n", - "run_dir.mkdir(exist_ok=True)\n", - "\n", - "\n", - "control_cp = deepcopy(control)\n", - "# It is key that inputs exist from previous full-model run\n", - "control_cp.options[\"input_dir\"] = yaml_output_dir.resolve()\n", - "control_cp.options[\"netcdf_output_dir\"] = run_dir.resolve()\n", - "control_yaml_file = run_dir / \"control.yaml\"\n", - "control_cp.to_yaml(control_yaml_file)\n", - "pprint(control.to_dict(), sort_dicts=False)" + "pprint(pws.meta.find_variables(\"recharge\"))\n", + "print(\n", + " \"PRMSSoilzone dimension names: \",\n", + " submodel.processes[\"soilzone\"].dimensions,\n", + ")\n", + "print(\"nhru: \", submodel.processes[\"soilzone\"].nhru)\n", + "print(\n", + " \"PRMSSoilzone recharge shape: \",\n", + " submodel.processes[\"soilzone\"][\"recharge\"].shape,\n", + ")\n", + "print(\n", + " \"PRMSSoilzone recharge type: \",\n", + " type(submodel.processes[\"soilzone\"][\"recharge\"]),\n", + ")\n", + "print(\n", + " \"PRMSSoilzone recharge dtype: \",\n", + " submodel.processes[\"soilzone\"][\"recharge\"].dtype,\n", + ")" ] }, { "cell_type": "markdown", - "id": "5b81d5f5-e733-4347-b898-5e1a23f36a06", + "id": "af149db1-970c-47d2-b9dd-0d0b21f80810", "metadata": {}, "source": [ - "Now we will use the existing `model_dict` in memory, tayloring to the above and just keeping the processes of interest in the submodel." + "We see the length of the `nhru` dimension and that this is the only dimension on `recharge`. With the exception of the `PRMSSolar` and `PRMSAtmosphere` classes (which vectorizes compuations over time), `Processes` only have spatial dimensions. Their data is written to file with each timestep. Prognostic variables have a `variable_previous` (or `_old` or `_ante`, etc) version to store the antecedent values. One design feature of pywatershed is that all such prognostic variables can be identified in a `Process`'s `.advance()` method. \n", + "\n", + "FOr our current `submodel`, the last timestep is still in memory (even though we've finalized the run) and we can visualize it. The data are on the unstructured/polygon grid of Hydrologic Response Units (HRUs), we'll visualize the spatial distribution at this final time." ] }, { "cell_type": "code", "execution_count": null, - "id": "0bd108f6-aa1f-4f59-90c5-a8fe6018c90a", + "id": "e0ac5832-7437-467e-901d-c40e2d9ddab3", "metadata": {}, "outputs": [], "source": [ - "model_dict[\"control\"] = str(control_yaml_file)\n", - "model_dict_yaml_file = run_dir / \"model_dict.yaml\"\n", - "keep_procs = [\"prms_soilzone\", \"prms_groundwater\", \"prms_channel\"]\n", - "model_dict[\"model_order\"] = keep_procs\n", - "for kk in list(model_dict.keys()):\n", - " if isinstance(model_dict[kk], dict) and kk not in keep_procs:\n", - " del model_dict[kk]\n", - "\n", - "pprint(model_dict, sort_dicts=False)" + "%%do_not_run_this_cell\n", + "proc_plot = pws.analysis.process_plot.ProcessPlot(gis_files.gis_dir / \"drb_2yr\")\n", + "proc_name = \"soilzone\"\n", + "var_name = \"ssr_to_gw\"\n", + "proc = submodel.processes[proc_name]\n", + "display(proc_plot.plot(var_name, proc))" ] }, { "cell_type": "markdown", - "id": "53fdef0c-d77b-45bd-a6a2-cc227acbfae0", + "id": "13500c68-c363-4f8b-8db0-2c069aa4c5d7", "metadata": {}, "source": [ - "Now we write both the control and model dictionary to yaml files." + "We can easily check the results of our submodel model against our full model. This gives us an opportunity to look at the output files. We can start with recharge as our variable of interest. The model NetCDF output can be read in using `xarray` where we can see all the relevant metadata quickly." ] }, { "cell_type": "code", "execution_count": null, - "id": "4000d71a-3f58-46da-98fa-3c573b861b3c", + "id": "921eb7ef-c6e1-4ecf-b475-59d8552117b3", "metadata": {}, "outputs": [], "source": [ - "with open(model_dict_yaml_file, \"w\") as file:\n", - " _ = yaml.dump(model_dict, file)" - ] - }, - { - "cell_type": "markdown", - "id": "44f889c7-3763-4ed2-b93f-e3d6c2a3f67e", - "metadata": {}, - "source": [ - "And finally we instantiate the submodel from the model dictionary yaml file. " + "var = \"recharge\"\n", + "nhm_da = xr.load_dataarray(run_dir_submodel / f\"{var}.nc\")\n", + "sub_da = xr.load_dataarray(run_dir / f\"{var}.nc\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "fc6570af-c1ae-4d1f-ad7b-1b32427fc647", + "id": "15e7b2e0-c9b6-4100-a328-42add2cbc4a7", "metadata": {}, "outputs": [], "source": [ - "submodel = pws.Model.from_yaml(model_dict_yaml_file)\n", - "submodel" + "display(nhm_da)\n", + "display(sub_da)" ] }, { "cell_type": "markdown", - "id": "22155c95-1de6-45bf-9dea-f562df9fdeaf", + "id": "4f317363-c765-4dda-a972-92f5265abd47", "metadata": {}, "source": [ - "Now to look at the `ModelGraph` for the submodel." + "Now we can compare all output variables common to both runs, asserting that the two runs gave equal output." ] }, { "cell_type": "code", "execution_count": null, - "id": "5cae1a7a-e66e-4816-942a-201edab68b72", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - } - }, + "id": "92734cb1-2a69-4d4e-a575-692fef9d5f43", + "metadata": {}, "outputs": [], "source": [ - "show_params = not (platform == \"darwin\" and processor() == \"arm\")\n", - "try:\n", - " pws.analysis.ModelGraph(\n", - " submodel,\n", - " hide_variables=False,\n", - " process_colors=palette,\n", - " show_params=show_params,\n", - " ).SVG(verbose=True, dpi=48)\n", - "except:\n", - " static_url = \"https://github.com/EC-USGS/pywatershed/releases/download/1.1.0/notebook_01_cell_45_submodel_graph.png\"\n", - " print(\n", - " f\"Dot fails on some machines. You can see the graph at this url: {static_url}\"\n", - " )\n", - " from IPython.display import Image\n", - "\n", - " display(Image(url=static_url, width=700))" - ] - }, - { - "cell_type": "markdown", - "id": "85f9c997-9937-4207-9ca9-102f2b34f499", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - } - }, - "source": [ - "Note that the required inputs to the submodel are quire different and rely on the existence of these files having already been output by the full model. \n", - "\n", - "Now we can initalize output and run the submodel." + "submodel_variables = [\n", + " *pws.PRMSSoilzone.get_variables(),\n", + " *pws.PRMSGroundwater.get_variables(),\n", + " *pws.PRMSChannel.get_variables(),\n", + "]" ] }, { "cell_type": "code", "execution_count": null, - "id": "35002e93-b777-472e-8899-36548c1ea923", + "id": "1bfeb46f-8d29-49f4-8945-f9b443831f04", "metadata": {}, "outputs": [], "source": [ - "%%time\n", - "submodel.run(finalize=True)" + "for var in submodel_variables:\n", + " nhm_da = xr.load_dataarray(run_dir / f\"{var}.nc\")\n", + " sub_da = xr.load_dataarray(run_dir_submodel / f\"{var}.nc\")\n", + " xr.testing.assert_equal(nhm_da, sub_da)" ] }, { "cell_type": "markdown", - "id": "c787a163-c4dd-4826-b0f2-73e1b006c081", + "id": "ec06a8de-c605-409a-b09d-a42674fab66c", "metadata": {}, "source": [ - "We'll, that saved us some time. The run is similar to before, just using fewer processes. \n", - "\n", - "The final time is still in memory. We can take a look at, say, recharge. Before plotting, let's take a look at the data and the metadata for recharge a bit closer." + "We can make some scatter plots and timeseries plots for any variable of interest, since you were not convinced by the `assert_equal` above." ] }, { "cell_type": "code", "execution_count": null, - "id": "6bbf15be-871b-4b58-ad1f-45bfcb37fedc", + "id": "0a7ed710-d23a-4c80-8b28-0a1612a636b6", "metadata": {}, "outputs": [], "source": [ - "pprint(pws.meta.find_variables(\"recharge\"))\n", - "print(\n", - " \"PRMSSoilzone dimension names: \",\n", - " submodel.processes[\"prms_soilzone\"].dimensions,\n", - ")\n", - "print(\"nhru: \", submodel.processes[\"prms_soilzone\"].nhru)\n", - "print(\n", - " \"PRMSSoilzone recharge shape: \",\n", - " submodel.processes[\"prms_soilzone\"][\"recharge\"].shape,\n", + "var_name = \"seg_outflow\"\n", + "nhm_da = xr.load_dataarray(run_dir / f\"{var_name}.nc\")\n", + "sub_da = xr.load_dataarray(run_dir_submodel / f\"{var_name}.nc\")\n", + "scat = xr.merge(\n", + " [nhm_da.rename(f\"{var_name}_yaml\"), sub_da.rename(f\"{var_name}_subset\")]\n", ")\n", - "print(\n", - " \"PRMSSoilzone recharge type: \",\n", - " type(submodel.processes[\"prms_soilzone\"][\"recharge\"]),\n", + "space_dim = sub_da.dims[1]\n", + "display(\n", + " scat.hvplot(\n", + " x=f\"{var_name}_yaml\", y=f\"{var_name}_subset\", groupby=space_dim\n", + " ).opts(data_aspect=1)\n", ")\n", - "print(\n", - " \"PRMSSoilzone recharge dtype: \",\n", - " submodel.processes[\"prms_soilzone\"][\"recharge\"].dtype,\n", - ")" + "\n", + "scat.hvplot(y=f\"{var_name}_subset\", groupby=space_dim)" ] }, { "cell_type": "markdown", - "id": "af149db1-970c-47d2-b9dd-0d0b21f80810", + "id": "daf31415-8cc9-4688-bf28-08c905990433", "metadata": {}, "source": [ - "First we access the metadata on `recharge` and we see its description, dimension, type, and units. The we look at the dimension names of the PRMSSoilzone process in whith it is found. We see the length of the `nhru` dimension and that this is the only dimension on `recharge`. We also see that `recharge` is a `numpy.ndarray` with data type `float64`.\n", - "\n", - "So recharge only has spatial dimension. It is written to file with each timestep (or periodically). However, the last timestep is still in memory (even though we've finalized the run) and we can visualize it. The data are on the unstructured/polygon grid of Hydrologic Response Units (HRUs), we'll visualize the spatial distribution at this final time." + "### Adapter class\n", + "The `Adapter` class is the bit of magic behind how we drive `Processes` from files or from other `Processes`. Here we'll give a quick demo of the how this class works. " ] }, { "cell_type": "code", "execution_count": null, - "id": "e0ac5832-7437-467e-901d-c40e2d9ddab3", + "id": "02de8018-f152-459a-8fde-10a8ce3c56e8", "metadata": {}, "outputs": [], "source": [ - "proc_plot = pws.analysis.process_plot.ProcessPlot(\n", - " gis_files.gis_dir / \"drb_2yr\"\n", - ")\n", - "proc_name = \"prms_soilzone\"\n", - "var_name = \"ssr_to_gw\"\n", - "proc = submodel.processes[proc_name]\n", - "display(proc_plot.plot(var_name, proc))" + "control = pws.Control.from_yaml(run_dir_submodel / \"nhm_control.yaml\")\n", + "recharge_adapter = pws.adapter_factory(\n", + " run_dir_submodel / \"recharge.nc\", \"recharge\", control\n", + ")" ] }, { "cell_type": "markdown", - "id": "13500c68-c363-4f8b-8db0-2c069aa4c5d7", - "metadata": {}, - "source": [ - "We can easily check the results of our submodel model against our full model. This gives us an opportunity to look at the output files. We can start with recharge as our variable of interest. The model NetCDF output can be read in using `xarray` where we can see all the relevant metadata quickly." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "921eb7ef-c6e1-4ecf-b475-59d8552117b3", + "id": "f8c2ecdc-ed3b-4fad-a185-63b03cc3228b", "metadata": {}, - "outputs": [], "source": [ - "var = \"recharge\"\n", - "nhm_da = xr.open_dataarray(yaml_output_dir / f\"{var}.nc\")\n", - "sub_da = xr.open_dataarray(run_dir / f\"{var}.nc\")" + "Before the control and the adapter are advanced in time, the adapter has missing values." ] }, { "cell_type": "code", "execution_count": null, - "id": "15e7b2e0-c9b6-4100-a328-42add2cbc4a7", + "id": "55850d5e-d82d-4c1e-94b7-0bf3aabda086", "metadata": {}, "outputs": [], "source": [ - "display(nhm_da)\n", - "display(sub_da)" + "recharge_adapter.current" ] }, { "cell_type": "markdown", - "id": "4f317363-c765-4dda-a972-92f5265abd47", + "id": "ffc8890f-c679-4d9f-856e-a10c5dd347e3", "metadata": {}, "source": [ - "Now we can compare all output variables common to both runs, asserting that the two runs gave equal output." + "We advance through all time and we'll check that we get the values that are still in memory. This demo shows how the adapter class can easily make a NetCDF file look like a `Process`." ] }, { "cell_type": "code", "execution_count": null, - "id": "1bfeb46f-8d29-49f4-8945-f9b443831f04", + "id": "3fc29fb5-d73f-4fe9-b2b6-76d5079a4db5", "metadata": {}, "outputs": [], "source": [ - "for var in submodel_variables:\n", - " nhm_da = xr.open_dataarray(yaml_output_dir / f\"{var}.nc\")\n", - " sub_da = xr.open_dataarray(run_dir / f\"{var}.nc\")\n", - " xr.testing.assert_equal(nhm_da, sub_da)" + "for tt in range(control.n_times):\n", + " control.advance()\n", + " recharge_adapter.advance()\n", + " if tt == 0:\n", + " display(recharge_adapter.current)" ] }, { "cell_type": "code", "execution_count": null, - "id": "0a7ed710-d23a-4c80-8b28-0a1612a636b6", + "id": "a84140cc-232c-4bca-aa91-0e45524b2561", "metadata": {}, "outputs": [], "source": [ - "# var_name = \"dprst_seep_hru\"\n", - "nhm_da = xr.open_dataarray(yaml_output_dir / f\"{var_name}.nc\")\n", - "sub_da = xr.open_dataarray(run_dir / f\"{var_name}.nc\")\n", - "scat = xr.merge(\n", - " [nhm_da.rename(f\"{var_name}_yaml\"), sub_da.rename(f\"{var_name}_subset\")]\n", - ")\n", - "\n", - "display(\n", - " scat.hvplot(\n", - " x=f\"{var_name}_yaml\", y=f\"{var_name}_subset\", groupby=\"nhm_id\"\n", - " ).opts(data_aspect=1)\n", - ")\n", - "\n", - "scat.hvplot(y=f\"{var_name}_subset\", groupby=\"nhm_id\")" + "all(recharge_adapter.current == submodel.processes[\"soilzone\"][\"recharge\"])" ] }, { diff --git a/exercises/pywatershed/step3_mf6_api.ipynb b/exercises/pywatershed/step3_mf6_api.ipynb new file mode 100644 index 0000000..aa4b03a --- /dev/null +++ b/exercises/pywatershed/step3_mf6_api.ipynb @@ -0,0 +1,876 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "74dbbe8e-fb8a-4d69-86f3-0c41a3a10756", + "metadata": {}, + "source": [ + "# U.S. Geological Survey Class GW3099\n", + "Advanced Modeling of Groundwater Flow (GW3099)\\\n", + "Boise, Idaho\\\n", + "September 16 - 20, 2024\n", + "\n", + "![title](../../images/ClassLocation.jpg)\n", + "\n", + "# Pywatershed MF6 coupling\n", + "\n", + "## The challenge of interdisciplinary modeling\n", + " \n", + "A recurring challenge of water sciences is working across disciplines. Increasingly, this is where important scientific questions and advances are located. From a software perspective, the peril of interdisciplinary work is that it can become stale over time. This problem requires intentional software design to codes interoperable.\n", + "\n", + "One example of this is the case of GSFLOW, the USGS coupled groundwater-surfacewater flow model (Markstrom et al., 2008). GSFLOW represents a significant advancement in modeling many aspects of combined groundwater and surface water flow. However, over time, the GSFLOW code has diverged from the current and improved versions of both the USGS's hydrologic (Precipitation Runoff Modeling System, PRMS; Regan et al., 2022) and groundwater (MODFLOW, Langevin et al., 2017) models which it initially bridged. \n", + "\n", + "The challenge of working across-disciplines in a sustainable way is one issue addressed by the development of the Basic Model Interface (BMI, Hutton et al., 2020). Hughes et al. (2021) implemented BMI for MODFLOW 6 (MF6) and extended it to handle numerical-level model couplings. Hughes et al. (2021) specifically demonstrated the ease of coupling the PRMS model to MF6's BMI interface. \n", + "\n", + "This notebook reproduces the results of Hughes et al. (2021) but substitutes `pywatershed` for [PRMS-BMI](https://github.com/nhm-usgs/bmi-prms-demo) in the Sagehen Creek Watershed Simulation. The pywatershed package contains the same core functionality as the PRMS model in an interactive language with a more modularized design. The goal of this notebook is to give a concrete example of how pywatershed can couple to MF6 or to other models via BMI.\n", + "\n", + "## Model coupling design\n", + "\n", + "The model coupling design of Hughes et al. (2021) is similar to GSFLOW, with the following differences:\n", + "* Surface water is modeled on HRUs instead of a grid.\n", + "* The surface water soilzone process is only solved once per timestep instead of being recalculated with MF6 outer solver iterations.\n", + "\n", + "The coupling in in this notebook is more like GSFLOW and different from Hughes et al. (2021) in that:\n", + "* Surface runoff and interflow are routed from upslope to downslope HRUs using \"cascading flow\".\n", + "\n", + "Figure 1 below is a GSFLOW schematic from Markstrom et al. (2008). It shows two conceptual soil models interacting in this design, separated by the dashed line. Above the dashed line is the PRMS \"soilzone\" which takes infiltration terms (ssres_infil + pref_flow_infil) from PRMS internally. Vertical downward fluxes out of this PRMS soilzone (ssr_to_gw + soil_to_gw) which normally flow to the PRMS conceptual groundwater model are instead sent through the API to the unsaturated-zone flow (UZF) package of MF6 which show below the PRMS soilzone in the figure. (This flux is not explicit in the figure). Surface and subsurface \"cascading\" or lateral flow terms (hru_horton_cascflow + dunnianflow + pref_flow + slow_flow, represented by Hortonian runoff, Dunnian runoff, and interflow in the figure) from PRMS are mapped to the stream flow routing (SFR) package of MF6. Finally, evaporation from the saturated root zone is calculated by the UZF package in MF6 and we can pass unused potential evapotranspiration (`unused_potet`) from PRMS through the API to UZF.\n", + "\n", + "|![GSFlow schematic](gsflow_schematic.png)|\n", + "|:--:|\n", + "| Figure 1. GSFLOW conceptual schematic diagram from Markstrom et al. (2008) |\n", + "\n", + "Figure 2 shows additional detail on how pywatershed and MF6 are one-way coupled in this notebook. Pywatershed runs the hydrologic simulation on an unstructured grid of hydrologic response units (HRUs) as shown on the top. Starting from atmospheric focing inputs, and proceeding through canopy, snow, runoff, and soil process representations the same as used by GSFLOW except these are on HRUs in this notebooks. At the end of each daily timestep, fluxes and states are sent from pywatershed to MF6 using the MODFLOW6 API. As shown by arrows in the figure, separate spatial weights map states and fluxes from the HRUs to the MODFLOW grid and streamflow network. The gridded MF6 Unsaturated Zone Flow (UZF) package is conceptualized below the soilzone of pywatershed model and receives its vertical outflows. Unstatisfied potentital evapotransipiration in the hydrologic model, remapped from HRUs to gridcells, can be met by available water in the UZF rootzone. The hydrologic Hortonian, Dunnian and interflow runoff fluxes are mapped from the HRUS into the MF6 streamflow routing (SFR) network.\n", + "\n", + "Also shown in the figure, the model design allows the MF6 Unstaurated Zone Flow (UZF) package to determine saturation and reject some or all of its infiltration term coming from the PRMS soilzone. Instead of sending these back in a 2- way coupling, the rejected water is sent as runoff to streamflow routing (SFR) using the \"mover\" (MVR) package. Similarly, the MODFLOW drain (DRN) Package was used to simulate groundwater exfiltration to the land surface in areas where the groundwater levels exceed land surface. This exfiltration is mapped as runoff to SFR via MVR.\n", + "\n", + "| ![sagehen_schematic](sagehen_schematic_pws.png) |\n", + "|:--:|\n", + "| Figure 2. Overview of spatial discretizations, the pywatershed-MF6 coupling, and physical process representations used for modeling the Sagehen Creek Watershed. The execution loop first executes pywatershed (1), then its fluxes are disaggregated with the two mappings and sent to MF6 (2) for execution via its BMI API. |" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "65ab6d29-d622-4214-86ec-278565a494dc", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import pathlib as pl\n", + "import platform\n", + "import shutil\n", + "\n", + "import flopy\n", + "import hvplot.xarray # noqa\n", + "import jupyter_black\n", + "import numpy as np\n", + "import pywatershed as pws\n", + "import xarray as xr\n", + "from modflowapi import ModflowApi\n", + "\n", + "jupyter_black.load()\n", + "pws.utils.gis_files.download()\n", + "pws.utils.addtl_domain_files.download()\n", + "\n", + "nb_output_dir = pl.Path(\"./step3_mf6_api\").resolve()\n", + "if not nb_output_dir.exists():\n", + " nb_output_dir.mkdir()\n", + "\n", + "pws_root = pws.constants.__pywatershed_root__\n", + "domain_dir = (pws_root.parent / \"test_data/sagehen_5yr/\").resolve()\n", + "mf6_domain_dir = (pws_root.parent / \"test_data/sagehen_mf6\").resolve()" + ] + }, + { + "cell_type": "markdown", + "id": "d77c0a90-9d9b-464c-939c-a987b7a9114b", + "metadata": {}, + "source": [ + "## Get the MF6 dylibs or dll from the nightly build\n", + "Keep it fresh." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5cd67cf-c132-4f38-b608-c5d98ff8cfbc", + "metadata": {}, + "outputs": [], + "source": [ + "nightly_build_dir = nb_output_dir / \"mf6_nightly_build\"\n", + "nightly_build_dir.mkdir(exist_ok=True)\n", + "flopy.utils.get_modflow(\n", + " bindir=str(nightly_build_dir), repo=\"modflow6-nightly-build\", quiet=True\n", + ")\n", + "\n", + "if platform.system() == \"Windows\":\n", + " mf6_dll = nightly_build_dir / \"libmf6.dll\"\n", + "elif platform.system() == \"Darwin\":\n", + " mf6_dll = nightly_build_dir / \"libmf6.dylib\"\n", + "else:\n", + " mf6_dll = nightly_build_dir / \"libmf6.so\"\n", + "\n", + "assert mf6_dll.exists()" + ] + }, + { + "cell_type": "markdown", + "id": "057aabc7-0101-4804-8386-3f6b1efb9039", + "metadata": {}, + "source": [ + "## MF6 setup\n", + "The MF6 input files need copied to a run directory. One should look at `run_dir/mfsim.nam` (copied from the source `sagehenmodel` directory) to inspect the MF6 model setup up in more detail. Looking at `run_dir/ex-gwf-sagehen-gsf.tdis` we can see that the time units are days. The length units of meters are not explicitly stated, but we know that the grid in the `common/gwf_sagehen-gsf.dis` are in meters. We can also see that output will be sent to a directory `output` within the run directory. If we dont create this, the dylib/dll will die when it cant output to that location. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db740d2a-8543-4f59-9b12-1d39270fb42f", + "metadata": {}, + "outputs": [], + "source": [ + "files_dirs_to_cp = [\n", + " \"common\",\n", + " \"sagehenmodel\",\n", + " \"hru_weights.npz\",\n", + " \"sagehen_postprocess_graphs.py\",\n", + " \"prms_grid_v3-Copy1.nc\",\n", + "]\n", + "rename = {\n", + " \"sagehenmodel\": \"run_dir\",\n", + " \"sagehen_postprocess_graphs.py\": \"run_dir/sagehen_postprocess_graphs.py\",\n", + "}\n", + "for name in files_dirs_to_cp:\n", + " src = mf6_domain_dir / name\n", + " if name in rename.keys():\n", + " dst = nb_output_dir / rename[name]\n", + " else:\n", + " dst = nb_output_dir / name\n", + " if not dst.exists():\n", + " if src.is_dir():\n", + " shutil.copytree(src, dst)\n", + " else:\n", + " shutil.copy(src, dst)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d825ab14-0425-488e-8cd6-e4bcebfe25cd", + "metadata": {}, + "outputs": [], + "source": [ + "run_dir = nb_output_dir / \"run_dir\"\n", + "assert run_dir.exists()\n", + "\n", + "mf6_output_dir = run_dir / \"output\"\n", + "# This step is *critical* for MF6 initialization\n", + "if not mf6_output_dir.exists():\n", + " mf6_output_dir.mkdir()" + ] + }, + { + "cell_type": "markdown", + "id": "72122c0c-e68e-4145-8812-fff8f1b63619", + "metadata": {}, + "source": [ + "## Pywatershed setup\n", + "The pyawatershed files dont need copied, they are just used in memory. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5eacd8f9-7290-48e9-a9ba-9bf582ed8e4e", + "metadata": {}, + "outputs": [], + "source": [ + "control_file = domain_dir / \"sagehen_no_gw_cascades.control\"\n", + "control = pws.Control.load_prms(control_file)\n", + "\n", + "parameter_file = domain_dir / control.options[\"parameter_file\"]\n", + "params = pws.parameters.PrmsParameters.load(parameter_file)\n", + "params = pws.utils.preprocess_cascades.preprocess_cascade_params(\n", + " control, params, verbosity=0\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "8655dd6b-e3de-4836-bb2f-bac3e4e240ca", + "metadata": {}, + "source": [ + "We'll do some customization of the control" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dde017ba-ceea-4d19-8e04-d3831d775760", + "metadata": {}, + "outputs": [], + "source": [ + "# use the 16 year CBH files from supplementary pywatershed files\n", + "control.options[\"input_dir\"] = (\n", + " pws.constants.__pywatershed_root__\n", + " / \"data/pywatershed_addtl_domains/sagehen_cbh_16yr\"\n", + ")\n", + "control.edit_n_time_steps(5844) # the 16 years in the forcing files above\n", + "control.options[\"calc_method\"] = \"numba\"\n", + "# Runoff and Soil cascades fail mass balance at some low level though their\n", + "# outputs match PRMS, so we'll turn off the budget for now.\n", + "control.options[\"budget_type\"] = None\n", + "control.options[\"netcdf_output_dir\"] = nb_output_dir / \"pws_model_output\"" + ] + }, + { + "cell_type": "markdown", + "id": "b8d3e34d-eb4f-41d0-a70a-2d4f327eb9e9", + "metadata": {}, + "source": [ + "We discussed the coupling strategy above, but now it's time to get specific with pywatershed. Fortunately, we can ask pywatershed for details about its fluxes and states. For fluxes leaving the model, we can look specifically at the \"output\" mass balance terms. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "47f28ecd-28e0-480d-8f32-eccb71ab146c", + "metadata": {}, + "outputs": [], + "source": [ + "pws.meta.get_vars(\n", + " pws.PRMSRunoffCascadesNoDprst.get_mass_budget_terms()[\"outputs\"]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "d97ca6e9-700c-4e71-8c0c-c241763d5d02", + "metadata": {}, + "source": [ + "From the `PRMSRunoffCascadesNoDprst` process, the `hru_horton_cascflow` variable is one component of flux we want to map to SFR in MF6. Looking at `PRMSSoilzoneCascadesNoDprst` next, " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0a508f36-884c-4519-a00d-cfe65ee117a9", + "metadata": {}, + "outputs": [], + "source": [ + "pws.meta.get_vars(\n", + " pws.PRMSSoilzoneCascadesNoDprst.get_mass_budget_terms()[\"outputs\"]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "32f691fe-44b6-41e4-bd89-9709749e5994", + "metadata": {}, + "source": [ + "To SFR, we'll also want to add `dunnian_flow`, `pref_flow`, and `slow_flow`. Note that all these fluxes to SFR are reported by PRMS in inches on an HRU per day. We'll need to convert these depths into volumes in cubic meters for SFR. This will require the area of the HRUs from which these fluxes come. \n", + "\n", + "Similarly, the vertical fluxes from `PRMSSoilzoneCascadesNoDprst` to UZF, `soil_to_gw` and `ssr_to_gw`, are in inches on an HRU per day and we'll have to make the same unit conversion to volume for MF6 inputs. Finally, let's look at `unused_potet`. We'll verify that it's a varible in `PRMSSoilzoneCascadesNoDprst`, which is the last process in pywatershed to take its toll on potential evapotranspiration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79d41315-5fe9-450d-988d-9a330a47fb43", + "metadata": {}, + "outputs": [], + "source": [ + "assert \"unused_potet\" in pws.PRMSSoilzoneCascadesNoDprst.get_variables()\n", + "pws.meta.get_vars(\"unused_potet\")" + ] + }, + { + "cell_type": "markdown", + "id": "b5f13526-50ef-4c7c-a1a6-3d53490116d3", + "metadata": {}, + "source": [ + "Again, the state of this variable is in inches on an HRU and will need the same conversion as the other variables.\n", + "\n", + "We specify the variable names we'd like to be output by the pywatershed model run. We make this output optional with the `pws_write_output` variable. Below, we'll actually write our own custom output routine for pywatershed to see how easy that is." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28c6d9f5-066b-4380-9169-f470943abf1b", + "metadata": {}, + "outputs": [], + "source": [ + "pws_write_output = False\n", + "output_var_names = [\n", + " \"hru_ppt\",\n", + " \"potet\",\n", + " \"hru_actet\",\n", + " \"ssres_in\",\n", + " \"pref_flow_infil\",\n", + " \"ssr_to_gw\",\n", + " \"soil_to_gw\",\n", + " \"sroff\",\n", + " \"ssres_flow\",\n", + " \"pref_flow\",\n", + " \"dunnian_flow\",\n", + " \"slow_flow\",\n", + " \"hru_horton_casscflow\",\n", + " \"dunnian_flow\",\n", + "]\n", + "control.options[\"netcdf_output_var_names\"] = output_var_names" + ] + }, + { + "cell_type": "markdown", + "id": "a6c1b085-e8bf-4cf4-a1f4-7fe905b2691e", + "metadata": {}, + "source": [ + "We can specify and initialize the model here" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "72f0d736-8435-4ce2-a97b-6ee3f2995c4e", + "metadata": {}, + "outputs": [], + "source": [ + "prms_processes = [\n", + " pws.PRMSSolarGeometry,\n", + " pws.PRMSAtmosphere,\n", + " pws.PRMSCanopy,\n", + " pws.PRMSSnow,\n", + " pws.PRMSRunoffCascadesNoDprst,\n", + " pws.PRMSSoilzoneCascadesNoDprst,\n", + "]\n", + "\n", + "prms_model = pws.Model(\n", + " prms_processes,\n", + " control=control,\n", + " parameters=params,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "aa73ab2a-1164-4b84-8b2c-e8a3c54debc3", + "metadata": {}, + "source": [ + "## Spatial mappings\n", + "These mappings from HRUs to the MF6 grid and to the SFR network are magically created. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7dcd46a9-11af-414f-abbc-2362f5c696c0", + "metadata": {}, + "outputs": [], + "source": [ + "weights = np.load(nb_output_dir / \"hru_weights.npz\")" + ] + }, + { + "cell_type": "markdown", + "id": "0fb6f88b-c225-44da-9791-4dc3e9a6768c", + "metadata": {}, + "source": [ + "_HRU to UZF weights_" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f101ff73-0434-4c51-aa84-5e7041423364", + "metadata": {}, + "outputs": [], + "source": [ + "uzfw = weights[\"uzfw\"]\n", + "nuzf_infilt = uzfw.shape[0]\n", + "uzfw.shape" + ] + }, + { + "cell_type": "markdown", + "id": "67485a18-371d-43aa-95fb-fb30fb36dd8d", + "metadata": {}, + "source": [ + "_HRU to SFR weights_" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c9ea6588-9cbd-4e7c-b301-86c2f3c4446a", + "metadata": {}, + "outputs": [], + "source": [ + "sfrw = weights[\"sfrw\"]\n", + "sfrw.shape" + ] + }, + { + "cell_type": "markdown", + "id": "065f4cef-321b-4ee9-920f-c1575975f3a5", + "metadata": {}, + "source": [ + "This indicates that here are 128 HRUs, 3386 gridcells, and 201 stream reaches. " + ] + }, + { + "cell_type": "markdown", + "id": "2936b2ae-d939-4138-a1f8-e3081b9a80d7", + "metadata": {}, + "source": [ + "Define a function to map HRU values to MODFLOW 6 values, which is just a dot product." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0e98ae6-c950-4819-ae02-4afc4062c1de", + "metadata": {}, + "outputs": [], + "source": [ + "def hru2mf6(weights, values):\n", + " return weights.dot(values)" + ] + }, + { + "cell_type": "markdown", + "id": "121152b8-ef53-4743-8ca2-d57ed9ceb875", + "metadata": {}, + "source": [ + "## Unit conversion factors\n", + "As noted above, we'll need to comvert from inches on an hru to cubic meters. The HRU areas also happen to be in acres! This is FUN." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b78d9368-e2fa-4278-afa6-8d77746eb7bf", + "metadata": {}, + "outputs": [], + "source": [ + "m2ft = 3.28081\n", + "in2m = 1.0 / (12.0 * m2ft)\n", + "acre2m2 = 43560.0 / (m2ft * m2ft)\n", + "hru_area_m2 = params.parameters[\"hru_area\"] * acre2m2" + ] + }, + { + "cell_type": "markdown", + "id": "d13a52d5-431a-4f3c-9ff7-81079e7f5279", + "metadata": {}, + "source": [ + "## Run one-way coupled pywatershed and MODFLOW 6 models\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "484c1628-6866-412b-83a8-5ec88b1b71b5", + "metadata": {}, + "outputs": [], + "source": [ + "os.chdir(run_dir)\n", + "print(\"changing to run directory:\\n\", os.getcwd())\n", + "ntimes = int(control.n_times)\n", + "print(\"Number of days to simulate {}\".format(ntimes))" + ] + }, + { + "cell_type": "markdown", + "id": "70498d06-a3b2-4d3b-a878-739efb80eba5", + "metadata": {}, + "source": [ + "### Custom pywatershed output for figures\n", + "While pywatershed has NetCDF output available, the existing plots are setup to work with data collected during the run saved to NetCDF at the end of the run. We follow this path for ease of reproducing the plots and this illustrates how custom output can be quite easily achieved with pywatershed. Custom output which saves data to memory and only writes after the simulation, as done below, can signficantly decrease pywatershed run times as writing to netcdf incurs a wall time cost with output to disk during the model run loop. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1815eb83-a793-43f6-b060-e4c6510ec869", + "metadata": {}, + "outputs": [], + "source": [ + "# We'll store a dictionary of numpy arrays and convert to netcdf via\n", + "# xarray on output.\n", + "prms_vars = [\n", + " \"hru_ppt_out\",\n", + " \"actet_out\",\n", + " \"unused_potet_out\",\n", + " \"prms_infil_out\",\n", + " \"runoff_out\",\n", + " \"interflow_out\",\n", + "]\n", + "prms_var_dict = {}\n", + "prms_var_dict[\"time_out\"] = np.empty(ntimes, dtype=\"datetime64[s]\")\n", + "for vv in prms_vars:\n", + " prms_var_dict[vv] = np.zeros(\n", + " (ntimes, hru_area_m2.shape[0]), dtype=np.float64\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "b15e6204-70bf-432d-809f-b19796814153", + "metadata": {}, + "source": [ + "### Initialize MODFLOW 6\n", + "\n", + "The dylib/DLL Initialization requires all inputs AND the output directory to exist." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6a87149e-0c94-4bf2-b2aa-cf03348fa8f1", + "metadata": {}, + "outputs": [], + "source": [ + "mf6_config_file = \"mfsim.nam\"\n", + "mf6 = ModflowApi(mf6_dll, working_directory=os.getcwd())\n", + "mf6.initialize(str(mf6_config_file))" + ] + }, + { + "cell_type": "markdown", + "id": "aad59364-b785-4b88-bc87-e74585e6ce5d", + "metadata": {}, + "source": [ + "Get information about the modflow model start and end times" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f403a084-381c-4956-9f2d-ba6c070d1dbd", + "metadata": {}, + "outputs": [], + "source": [ + "current_time = mf6.get_current_time()\n", + "end_time = mf6.get_end_time()\n", + "print(\n", + " f\"MF current_time: {current_time}, prms control.start_time: {control.start_time}\"\n", + ")\n", + "print(f\"MF end_time: {end_time}, prms control.n_times: {control.n_times}\")" + ] + }, + { + "cell_type": "markdown", + "id": "5f78db76-2ccb-45cf-b636-f6fc3c2b33c3", + "metadata": {}, + "source": [ + "### Get pointers to MODFLOW 6 variables\n", + "\n", + "As shown in Figures 1 and 2, pywatershed is sending the following fluxes to MF6\n", + "\n", + "* SINF is surface infiltration to MF6's UZF package, this is mapped from groundwater recharge in pywatershed\n", + "* PET is unsatisfied potential evapotransipiration in MF6's UZF, this is also calculated in the soilzone in pytwatershed\n", + "* RUNOFF to MF6's SFR comes from combined surface and subsurface runoff from pywatershed.\n", + "\n", + "To set these values on MF6, we need the to get the pointers into MF6 using its BMI interface." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8518c7f8-17eb-4856-a662-fbe2a7109c02", + "metadata": {}, + "outputs": [], + "source": [ + "sim_name = \"sagehenmodel\"\n", + "mf6_var_model_dict = {\"SINF\": \"UZF-1\", \"PET\": \"UZF-1\", \"RUNOFF\": \"SFR-1\"}\n", + "mf6_vars = {}\n", + "for vv, mm in mf6_var_model_dict.items():\n", + " mf6_vars[vv] = mf6.get_value_ptr(\n", + " mf6.get_var_address(vv, sim_name.upper(), mm)\n", + " )\n", + "\n", + "for vv, dd in mf6_vars.items():\n", + " print(f\"shape of {vv}: {dd.shape}\")" + ] + }, + { + "cell_type": "markdown", + "id": "5276591a-df87-4d23-865d-7675a3bc92f4", + "metadata": {}, + "source": [ + "The UZF model has 2 vertical layers, so the number of points in UZF is twice what was shown in the spatial weights mappings above. The first layer is first in the linear dimension." + ] + }, + { + "cell_type": "markdown", + "id": "468c8848-56f9-4996-affc-78c85265e156", + "metadata": {}, + "source": [ + "### Run the models\n", + "\n", + "Now we run the model. We use pywatershed to control the simulation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bad87670-58e0-45f9-8468-31a04b3bc659", + "metadata": {}, + "outputs": [], + "source": [ + "if pws_write_output:\n", + " prms_model.initialize_netcdf()\n", + "\n", + "n_time_steps = control.n_times # redundant above?\n", + "for istep in range(n_time_steps):\n", + " prms_model.advance()\n", + "\n", + " if control.current_dowy == 1:\n", + " if istep > 0:\n", + " print(\"\\n\")\n", + " print(f\"Water year: {control.current_year + 1}\")\n", + "\n", + " msg = f\"Day of water year: {str(control.current_dowy).zfill(3)}\"\n", + " print(msg, end=\"\\r\")\n", + "\n", + " # run pywatershed\n", + " prms_model.calculate()\n", + " prms_model.output() # if output is not initialized, does nothing\n", + "\n", + " # calculate variables for output and coupling. Some variables output just for\n", + " # diagnostic purposes\n", + " hru_ppt = prms_model.processes[\"PRMSAtmosphere\"].hru_ppt.current\n", + " prms_infil = (\n", + " prms_model.processes[\"PRMSSoilzoneCascadesNoDprst\"].ssres_in\n", + " + prms_model.processes[\"PRMSSoilzoneCascadesNoDprst\"].pref_flow_infil\n", + " )\n", + " actet = prms_model.processes[\"PRMSSoilzoneCascadesNoDprst\"].hru_actet\n", + "\n", + " # coupling variables\n", + " unused_potet = prms_model.processes[\n", + " \"PRMSSoilzoneCascadesNoDprst\"\n", + " ].unused_potet\n", + "\n", + " uzf_recharge = (\n", + " prms_model.processes[\"PRMSSoilzoneCascadesNoDprst\"].ssr_to_gw\n", + " + prms_model.processes[\"PRMSSoilzoneCascadesNoDprst\"].soil_to_gw\n", + " )\n", + "\n", + " hortonian = prms_model.processes[\n", + " \"PRMSRunoffCascadesNoDprst\"\n", + " ].hru_horton_cascflow\n", + " dunnian = prms_model.processes[\"PRMSSoilzoneCascadesNoDprst\"].dunnian_flow\n", + " pref_flow = prms_model.processes[\"PRMSSoilzoneCascadesNoDprst\"].pref_flow\n", + " slow_flow = prms_model.processes[\"PRMSSoilzoneCascadesNoDprst\"].slow_flow\n", + " prms_ro = (\n", + " (hortonian + dunnian + pref_flow + slow_flow) * in2m * hru_area_m2\n", + " )\n", + "\n", + " # save PRMS results (converted to m3(/d))\n", + " prms_var_dict[\"time_out\"][istep] = control.current_time\n", + " prms_var_dict[\"hru_ppt_out\"][istep, :] = hru_ppt * in2m * hru_area_m2\n", + " prms_var_dict[\"unused_potet_out\"][istep, :] = (\n", + " unused_potet * in2m * hru_area_m2\n", + " )\n", + " prms_var_dict[\"actet_out\"][istep, :] = actet * in2m * hru_area_m2\n", + " prms_var_dict[\"prms_infil_out\"][istep, :] = prms_infil * in2m * hru_area_m2\n", + " prms_var_dict[\"runoff_out\"][istep, :] = (\n", + " (hortonian + dunnian) * in2m * hru_area_m2\n", + " )\n", + " prms_var_dict[\"interflow_out\"][istep, :] = (\n", + " (pref_flow + slow_flow) * in2m * hru_area_m2\n", + " )\n", + "\n", + " # Set MF6 pointers\n", + " mf6_vars[\"RUNOFF\"][:] = hru2mf6(sfrw, prms_ro) # sroff + ssres_flow\n", + " mf6_vars[\"SINF\"][:nuzf_infilt] = (\n", + " hru2mf6(uzfw, uzf_recharge) * in2m\n", + " ) # ssr_to_gw + soil_to_gw\n", + " mf6_vars[\"PET\"][:nuzf_infilt] = hru2mf6(uzfw, unused_potet) * in2m\n", + "\n", + " # run MODFLOW 6\n", + " mf6.update()\n", + "\n", + "try:\n", + " mf6.finalize()\n", + " prms_model.finalize()\n", + " success = True\n", + "except:\n", + " raise RuntimeError" + ] + }, + { + "cell_type": "markdown", + "id": "c9584e62-d72a-48f0-9286-e69b66d0257b", + "metadata": {}, + "source": [ + "### Save custom pywatershed output to NetCDF\n", + "Variables collected in memory from pywatershed are converted to NetCDF via xarray data sets. We choose to write one file per variable." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4280eb0-fc4f-475c-a315-6808a4e622a0", + "metadata": {}, + "outputs": [], + "source": [ + "pws_output_dir = nb_output_dir / \"pws_output\"\n", + "pws_output_dir.mkdir(exist_ok=True)\n", + "for prms_var in prms_vars:\n", + " if prms_var == \"time_out\":\n", + " continue\n", + " var = prms_var[0:-4]\n", + " print(var)\n", + " ds = xr.Dataset(\n", + " data_vars={var: ([\"time\", \"nhru\"], prms_var_dict[prms_var])},\n", + " coords={\"time\": prms_var_dict[\"time_out\"].astype(\"datetime64[ns]\")},\n", + " )\n", + " ds.to_netcdf(pws_output_dir / f\"{var}.nc\")\n", + " del ds" + ] + }, + { + "cell_type": "markdown", + "id": "c5dddfe4-570c-40bd-ad7b-b5dc6c0ab983", + "metadata": {}, + "source": [ + "## Plot\n", + "\n", + "We spare the user the details of the plotting. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "da285185-4535-4e04-a144-2f3bc362130e", + "metadata": {}, + "outputs": [], + "source": [ + "fig_out_dir = nb_output_dir / \"figures\"\n", + "fig_out_dir.mkdir(exist_ok=True)\n", + "\n", + "run_dir = nb_output_dir / \"run_dir\"\n", + "os.chdir(run_dir) # make sure\n", + "\n", + "import sagehen_postprocess_graphs as graphs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10adb506-c0f4-440d-9996-b2dd12b65ffc", + "metadata": {}, + "outputs": [], + "source": [ + "graphs.streamflow_fig()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aefe7226-2a20-402a-beed-28fcb452e111", + "metadata": {}, + "outputs": [], + "source": [ + "graphs.et_recharge_ppt_fig()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c67ee984-324e-4e48-ae8a-b83c4a58094e", + "metadata": {}, + "outputs": [], + "source": [ + "graphs.gwf_uzf_storage_changes_fig()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bdcb23dd-d9e5-4a82-9a0f-80d10760e4a2", + "metadata": {}, + "outputs": [], + "source": [ + "graphs.cumulative_streamflow_fig()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e6222f67-a8c6-4467-8f3b-a473c6de490d", + "metadata": {}, + "outputs": [], + "source": [ + "graphs.composite_fig()" + ] + }, + { + "cell_type": "markdown", + "id": "30bb07b5-4895-4c77-ad3a-1d878a4c245b", + "metadata": {}, + "source": [ + "Compare our plots side-by-side, very scientific! Remember that we are not using the exact same model coupling design, we have introduced cascading flow to the model use by Hughes et al. (2022). So the results wont match, but they should be close. \n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Our resultsHughes et al. (2022), Figure 12
\"Our\"Hughes
\n", + "
\n" + ] + }, + { + "cell_type": "markdown", + "id": "a18a1dbf-d942-4d68-9308-c5eef2fbde55", + "metadata": {}, + "source": [ + "## Conclusions\n", + "\n", + "This one-way coupling of pywatershed and MODFLOW 6 demonstrates how software interoperatbility standards can bridge hydrologic models from \"different\" domains. \n", + "\n", + "\n", + "Pywatershed is close to reproducing GSFLOW, with \"cascading flows\" and 2-way couling with MF6 to be available in version 3.0." + ] + }, + { + "cell_type": "markdown", + "id": "57f787ba-e317-4ef7-b9f8-94432ec5a161", + "metadata": {}, + "source": [ + "## References\n", + "* Hutton, E. W., Piper, M. D., & Tucker, G. E. (2020). The Basic Model Interface 2.0: A standard interface for coupling numerical models in the geosciences. Journal of Open Source Software, 5(51), 2317.\n", + "* Hughes, J. D., Russcher, M. J., Langevin, C. D., Morway, E. D., & McDonald, R. R. (2022). The MODFLOW Application Programming Interface for simulation control and software interoperability. Environmental Modelling & Software, 148, 105257.\n", + "* Langevin, C. D., Hughes, J. D., Banta, E. R., Niswonger, R. G., Panday, S., & Provost, A. M. (2017). Documentation for the MODFLOW 6 groundwater flow model (No. 6-A55). US Geological Survey.\n", + "* Markstrom, S. L., Niswonger, R. G., Regan, R. S., Prudic, D. E., & Barlow, P. M. (2008). GSFLOW-Coupled Ground-water and Surface-water FLOW model based on the integration of the Precipitation-Runoff Modeling System (PRMS) and the Modular Ground-Water Flow Model (MODFLOW-2005). US Geological Survey techniques and methods, 6, 240.\n", + "* Regan, R. S., Markstrom, S. L., Hay, L. E., Viger, R. J., Norton, P. A., Driscoll, J. M., & LaFontaine, J. H. (2018). Description of the national hydrologic model for use with the precipitation-runoff modeling system (prms) (No. 6-B9). US Geological Survey.\n", + "* Regan, R.S., Markstrom, S.L., LaFontaine, J.H., 2022, PRMS version 5.2.1: Precipitation-Runoff Modeling System (PRMS): U.S. Geological Survey Software Release, 02/10/2022." + ] + } + ], + "metadata": { + "jupytext": { + "cell_metadata_filter": "-all", + "main_language": "python", + "notebook_metadata_filter": "-all" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython" + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/exercises/pywatershed/step4_flow_graph.ipynb b/exercises/pywatershed/step4_flow_graph.ipynb new file mode 100644 index 0000000..fadd633 --- /dev/null +++ b/exercises/pywatershed/step4_flow_graph.ipynb @@ -0,0 +1,797 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "66cf7f2b-ca2e-40b0-81e9-6cf8bfd0e2eb", + "metadata": {}, + "source": [ + "# PRMSChannel FlowGraph with a STARFIT Reservoir: Big Sandy Reservoir\n", + "\n", + "This notebook demonstrates the capabilities of the `FlowGraph` class and its associated classes `FlowNode` and `FlowNodeMaker` in a real-world example. \n", + "\n", + "\"What is `FlowGraph`?\", you ask. The `FlowGraph` concept solves the problem of allowing users to combine multiple kinds of flow solutions within a network or \"graph\" of 1-D, explict flow. Have you ever had a streamflow network and just wish you could add a reservoir here or there? Did you ever wish you could make model flows match observations where they are available or maybe you would have liked to specify diversions a a few key points? \n", + "\n", + "The FlowGraph capabilities in pywatershed 2.0 allow users to create their own networks or “graphs” combining different explicit flow solutions on each node. There are many applications, but a simple, common one is to add a reservoir representation into an existing graph of flow, as shown schematically in the figure below.\n", + "\n", + "![FlowGraph is cool](pywatershed_repo/doc/_static/flow_graph_schematic_1.png)\n", + "\n", + "\n", + "FlowGraph can represent arbitrarily complex, user-defined combinations of flow solutions or “FlowNodes”. The core concept is that a FlowGraph manages and computes FlowNodes provided by FlowNodeMakers. This relationship is illustrated in the figure below. Note that the FlowNodes on the right side are “classes” or “kinds”. On the left, inside the FlowGraph, there are individual FlowNode instances, complete with data provided to each by the FlowNodeMaker upon FlowNode instantiation. FlowNodes instances calculate explicit solutions of flow on asingle spatial unit using the data at that location.With version 2.0, these FlowNode classes are available: PRMSChannelFlowNode, STARFITFlowNode, ObsInNode, PassThroughNode. PRMSChannel and STARFIT are discussed in the panel to the right.\n", + "\n", + "![FlowGraph is cooler](pywatershed_repo/doc/_static/flow_graph_schematic_2.png)\n", + "\n", + "Thie real-world example in this notebook starts from an existing flow graph which is in `PRMSChannel` and adds in a single new node to represent a reservoir within the `PRMSChannel` simulation. The `FlowGraph` is the class which is able to take different\n", + "flow methods and combine them in user-specified ways. In this case we combine nodes of class\n", + "`PRMSChannelFlowNode` with one node of class `StarfitFlowNode`. \n", + "\n", + "Please see these links to the documentation for more details on \n", + "[`FlowGraph`](https://pywatershed.readthedocs.io/en/latest/api/generated/pywatershed.FlowGraph.html), \n", + "[`StarfitFlowNode`](https://pywatershed.readthedocs.io/en/latest/api/generated/pywatershed.StarfitFlowNode.html), and \n", + "[`PRMSChannelFlowNode`](https://pywatershed.readthedocs.io/en/latest/api/generated/pywatershed.PRMSChannelFlowNode.html)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "42cab067-be80-4743-b09a-410c215abfcb", + "metadata": {}, + "outputs": [], + "source": [ + "import pathlib as pl\n", + "from pprint import pprint\n", + "\n", + "import hvplot.pandas # noqa, after pandas\n", + "import hvplot.xarray # noqa, after xr\n", + "import jupyter_black\n", + "import numpy as np\n", + "import pandas as pd\n", + "import pyPRMS\n", + "import pywatershed as pws\n", + "import xarray as xr\n", + "from pywatershed.constants import zero\n", + "from pywatershed.plot import DomainPlot\n", + "from tqdm.auto import tqdm\n", + "\n", + "ndays_run = 365 * 2\n", + "plot_height = 600\n", + "plot_width = 1000\n", + "\n", + "jupyter_black.load()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ce9478cf-9126-4bbe-993e-a2cb6d04001b", + "metadata": {}, + "outputs": [], + "source": [ + "pws.utils.addtl_domain_files.download()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4ac724e3-5d9c-4e8e-9334-2f7f0be643b5", + "metadata": {}, + "outputs": [], + "source": [ + "nb_output_dir = pl.Path(\"./step4_flow_graph\")\n", + "if not nb_output_dir.exists():\n", + " nb_output_dir.mkdir()" + ] + }, + { + "cell_type": "markdown", + "id": "314d5271-3976-430b-a754-b7a72e336704", + "metadata": {}, + "source": [ + "## The Big Sandy Dike and the Flaming Gorge Domain\n", + "Let's get to know something about this domain and reservoir. We'll load the full Global Reservoir and Dam (GRanD) data set and pull out the row for Big Sandy. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "92ec574d-111e-4637-8389-9bb777a7f4db", + "metadata": {}, + "outputs": [], + "source": [ + "pkg_root = pws.constants.__pywatershed_root__\n", + "big_sandy_param_file = pkg_root / \"data/big_sandy_starfit_parameters.nc\"\n", + "sf_params = pws.Parameters.from_netcdf(big_sandy_param_file, use_xr=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35a7f96d-bb4d-4631-a77b-f20ae11912c0", + "metadata": {}, + "outputs": [], + "source": [ + "# Take a look in pandas format\n", + "sf_params.to_xr_ds().to_pandas()" + ] + }, + { + "cell_type": "markdown", + "id": "19f46834-677f-4f3f-9c76-a53034e8b65e", + "metadata": {}, + "source": [ + "The reservoir capacity is 67.1 million cubic meters (MCM). Let's find it on a map... " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "90d3878e-0308-413f-a1d1-36a8f64956d8", + "metadata": {}, + "outputs": [], + "source": [ + "start_lat = sf_params.parameters[\"LAT_DD\"]\n", + "start_lon = sf_params.parameters[\"LONG_DD\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10004a14-785a-43f4-abc6-5102156d219e", + "metadata": {}, + "outputs": [], + "source": [ + "domain_dir = pkg_root / \"data/pywatershed_addtl_domains/fgr_2yr\"\n", + "domain_gis_dir = pkg_root / \"data/pywatershed_gis/fgr_2yr\"\n", + "\n", + "control_file = domain_dir / \"nhm.control\"\n", + "\n", + "shp_file_hru = domain_gis_dir / \"model_nhru.shp\"\n", + "shp_file_seg = domain_gis_dir / \"model_nsegment.shp\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19533c6d-35b4-4017-9d03-a8992ca3f876", + "metadata": {}, + "outputs": [], + "source": [ + "# add GRanD shp file? or add to the object afterwards? option to get polygons\n", + "# for sf_params above? but how to show connectivity?\n", + "DomainPlot(\n", + " hru_shp_file=shp_file_hru,\n", + " segment_shp_file=shp_file_seg,\n", + " hru_parameters=domain_dir / \"parameters_dis_hru.nc\",\n", + " hru_parameter_names=[\n", + " \"nhm_id\",\n", + " \"hru_lat\",\n", + " \"hru_lon\",\n", + " \"hru_area\",\n", + " ],\n", + " segment_parameters=domain_dir / \"parameters_dis_seg.nc\",\n", + " segment_parameter_names=[\n", + " \"nhm_seg\",\n", + " \"tosegment\",\n", + " \"seg_length\",\n", + " \"seg_slope\",\n", + " \"seg_cum_area\",\n", + " ],\n", + " start_lat=start_lat,\n", + " start_lon=start_lon,\n", + " start_zoom=13,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "1170d616-a760-455d-a88f-19ffc2205121", + "metadata": {}, + "source": [ + "From the above, by mousing over the segments we can see the reservoir should be inserted above nhm_seg 44426 and below nhm_segs 44434 and 44435. \n", + "\n", + "For more context, zooming out shows the full Flaming Gorge Domain on the Green River. The openstreetmap layers shows that Big Sandy Dike is located near Farson, WY. In the EsriSatellite layer, we observe this is a very dry, high plains region with farming downstream of the Big Sandy and Eden reservoirs around Farson. We can also see that the reservoir is fed by snowpack and seasonal runoff from the high Wind River Range to the Northeast. The photo of Arrowhead Lake below (taken by the author in August 2023) looks southeast at Temple Mountain, across the furthest upstream HRU (nhm id 86519) of the Big Sandy Dike. \n", + "![Arrowhead Lake, August 2023](pywatershed_repo/examples/static/arrowhead_lake.jpg)" + ] + }, + { + "cell_type": "markdown", + "id": "c98ccde5-0a85-491e-911c-4045aa68aa12", + "metadata": {}, + "source": [ + "## NHM Run on Flaming Gorge Domain: NO Big Sandy\n", + "The NHM does not represent any reservoirs. From above, we'll assume the outflows of Big Sandy are on segment 44426. We'll see how the NHM represents flow at Big Sandy.\n", + "We can run pywatershed using the \"legacy instantation\" as described in Notebook 02." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9b64dd07-cc07-4175-9f49-dffd532f7e81", + "metadata": {}, + "outputs": [], + "source": [ + "control = pws.Control.load_prms(control_file, warn_unused_options=False)\n", + "control.edit_n_time_steps(ndays_run)\n", + "parameter_file = domain_dir / control.options[\"parameter_file\"]\n", + "params = pws.parameters.PrmsParameters.load(parameter_file)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82ba984f-3402-416d-8800-0bee97158219", + "metadata": {}, + "outputs": [], + "source": [ + "# domain_dir = pl.Path(\"/Users/jmccreight/usgs/data/pynhm/fgr_2yr\")\n", + "# # run just once to convert CBH/day forcing files to pywatershed, NetCDF format\n", + "# cbh_nc_dir = domain_dir\n", + "# cbh_files = [\n", + "# domain_dir / \"prcp_2yr.cbh\",\n", + "# domain_dir / \"tmax_2yr.cbh\",\n", + "# domain_dir / \"tmin_2yr.cbh\",\n", + "# ]\n", + "\n", + "# params = pws.parameters.PrmsParameters.load(domain_dir / \"myparam.param\")\n", + "\n", + "# for cbh_file in cbh_files:\n", + "# out_file = cbh_nc_dir / cbh_file.with_suffix(\".nc\").name\n", + "# pws.utils.cbh_file_to_netcdf(cbh_file, params, out_file)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9288d4c9-a1f0-423f-b580-e43230651d0d", + "metadata": {}, + "outputs": [], + "source": [ + "# We'll output the to-channel fluxes for use later when running FlowGraph as a post-process.\n", + "run_dir = nb_output_dir / \"fgr_nhm\"\n", + "\n", + "control.options = control.options | {\n", + " \"input_dir\": domain_dir,\n", + " \"budget_type\": \"error\",\n", + " \"calc_method\": \"numba\",\n", + " \"netcdf_output_dir\": run_dir,\n", + " \"netcdf_output_var_names\": [\n", + " \"seg_outflow\",\n", + " \"sroff_vol\",\n", + " \"ssres_flow_vol\",\n", + " \"gwres_flow_vol\",\n", + " ],\n", + "}\n", + "\n", + "nhm_processes = [\n", + " pws.PRMSSolarGeometry,\n", + " pws.PRMSAtmosphere,\n", + " pws.PRMSCanopy,\n", + " pws.PRMSSnow,\n", + " pws.PRMSRunoff,\n", + " pws.PRMSSoilzone,\n", + " pws.PRMSGroundwater,\n", + " pws.PRMSChannel,\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2e96b9ba-1253-4a41-abfb-3444de3b37c0", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "if not run_dir.exists():\n", + " # must delete the run dir to re-run\n", + " run_dir.mkdir()\n", + " nhm = pws.Model(\n", + " nhm_processes,\n", + " control=control,\n", + " parameters=params,\n", + " )\n", + " nhm.run(finalize=True)\n", + " nhm.finalize()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cdcfb7bc-5ebe-4fb8-a4f6-91a8370f3fbf", + "metadata": {}, + "outputs": [], + "source": [ + "outflow = xr.open_dataarray(run_dir / \"seg_outflow.nc\").sel(nhm_seg=44426)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bec64846-76af-4934-a304-f40f33b0cb14", + "metadata": {}, + "outputs": [], + "source": [ + "outflow.hvplot(width=plot_width, height=plot_height)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "54058459-fed5-4cc3-bed2-8a3edfb22a13", + "metadata": {}, + "outputs": [], + "source": [ + "outflow_obs = (\n", + " xr.open_dataarray(run_dir / \"seg_outflow.nc\")\n", + " .sel(nhm_seg=44438)\n", + " .rename(\"modeled\")\n", + " .to_dataframe()[\"modeled\"]\n", + ")\n", + "obs_all = pyPRMS.Streamflow(domain_dir / \"sf_data\")\n", + "wh_poi_obs = np.where(params.parameters[\"poi_gage_segment\"] == 184)\n", + "gage_id = params.parameters[\"poi_gage_id\"][wh_poi_obs][0]\n", + "obs = obs_all.data[gage_id]\n", + "obs.rename(\"gage \" + obs.name, inplace=True)\n", + "\n", + "outflow_obs.hvplot() * obs[0 : (365 * 2)].hvplot()" + ] + }, + { + "cell_type": "markdown", + "id": "07caf286-e581-4cbd-83ad-e0b960e56911", + "metadata": {}, + "source": [ + "## FlowGraph in Model: NHM with a STARFIT representation of Big Sandy\n", + "\n", + "Because FlowGraph is not part of PRMS, we cant run FlowGraph with PRMS/NHM using the legacy instantiation (eg. notebook 02). We have to use a multi-process model, the pywatershed way (e.g. notebook 01). The next three cells build the multi-process model above the FlowGraph. We then use a helper function to insert the STARFIT resevoir into the PRMS/NHM Muskingum-Mann channel routing and append it to our multi-process model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9328c9fd-8afd-4bd5-8326-ed093d3610d0", + "metadata": {}, + "outputs": [], + "source": [ + "params_file_channel = domain_dir / \"parameters_PRMSChannel.nc\"\n", + "params_channel = pws.parameters.PrmsParameters.from_netcdf(params_file_channel)\n", + "\n", + "dis_file = domain_dir / \"parameters_dis_hru.nc\"\n", + "dis_hru = pws.Parameters.from_netcdf(dis_file, encoding=False)\n", + "\n", + "dis_both_file = domain_dir / \"parameters_dis_both.nc\"\n", + "dis_both = pws.Parameters.from_netcdf(dis_both_file, encoding=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "810d62fe-2742-48df-ae87-21ba419dd1a2", + "metadata": {}, + "outputs": [], + "source": [ + "control = pws.Control.load_prms(control_file, warn_unused_options=False)\n", + "control.edit_n_time_steps(ndays_run)\n", + "run_dir = nb_output_dir / \"fgr_starfit\"\n", + "control.options = control.options | {\n", + " \"input_dir\": domain_dir,\n", + " \"budget_type\": \"error\",\n", + " \"calc_method\": \"numba\",\n", + " \"netcdf_output_dir\": run_dir,\n", + " \"netcdf_output_var_names\": [\n", + " \"node_outflows\",\n", + " \"node_upstream_inflows\",\n", + " \"node_storages\",\n", + " ],\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f3eb9546-c026-4aa8-b9ea-9d37d7ece0e9", + "metadata": {}, + "outputs": [], + "source": [ + "nhm_processes = [\n", + " pws.PRMSSolarGeometry,\n", + " pws.PRMSAtmosphere,\n", + " pws.PRMSCanopy,\n", + " pws.PRMSSnow,\n", + " pws.PRMSRunoff,\n", + " pws.PRMSSoilzone,\n", + " pws.PRMSGroundwater, # stop here, we'll add PRMSChannel as part of FlowGraph later\n", + "]\n", + "\n", + "model_dict = {\n", + " \"control\": control,\n", + " \"dis_both\": dis_hru,\n", + " \"dis_hru\": dis_both,\n", + " \"model_order\": [],\n", + "}\n", + "\n", + "# As in notebook 01\n", + "for proc in nhm_processes:\n", + " proc_name = proc.__name__\n", + " proc_rename = \"prms_\" + proc_name[4:].lower()\n", + " model_dict[\"model_order\"] += [proc_rename]\n", + " model_dict[proc_rename] = {}\n", + " proc_dict = model_dict[proc_rename]\n", + " proc_dict[\"class\"] = proc\n", + " proc_param_file = domain_dir / f\"parameters_{proc_name}.nc\"\n", + " proc_dict[\"parameters\"] = pws.Parameters.from_netcdf(proc_param_file)\n", + " if proc_rename == \"prms_channel\":\n", + " proc_dict[\"dis\"] = \"dis_both\"\n", + " else:\n", + " proc_dict[\"dis\"] = \"dis_hru\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0b170049-ec6b-4a6d-b183-98effa92d6c0", + "metadata": {}, + "outputs": [], + "source": [ + "# what did that give us?\n", + "pprint(model_dict, sort_dicts=False)" + ] + }, + { + "cell_type": "markdown", + "id": "f229c613-56fe-49c0-b211-f0f047eddc57", + "metadata": {}, + "source": [ + "Now we have a model dictionary describing everything above the `PRMSChannel` (Musking-Mann). We have a very nice helper function, `prms_channel_flow_graph_to_model_dict`, we can use to add a `FlowGraph` to this model. The function takes the existing `model_dict`, the `PRMSChannel` information, plus additional user-supplied information, to construct a `FlowGraph` with a new `StarfitFlowNode` inserted in the `PRMSChannel` at the location above nhm segment 44426 (and below 44434 and 44435) to represent Big Sandy. This `FlowGraph` instance is added to the `model_dict` by name \"prms_channel_flow_graph\". \n", + "\n", + "The function will also add an `InflowExchange` instance to the `model_dict` named \"inflow_exchange\" which will manage getting the fluxes from PRMS to the FlowGraph. Zero lateral flows are supplied to the StarfitNode for Big Sandy in this case (though we could do otherwise)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db25a89a-7a37-405e-ad09-1bfe682cabd5", + "metadata": {}, + "outputs": [], + "source": [ + "model_dict = pws.prms_channel_flow_graph_to_model_dict(\n", + " model_dict=model_dict,\n", + " prms_channel_dis=dis_both,\n", + " prms_channel_dis_name=\"dis_both\",\n", + " prms_channel_params=params_channel,\n", + " new_nodes_maker_dict={\n", + " \"starfit\": pws.hydrology.starfit.StarfitFlowNodeMaker(\n", + " None,\n", + " sf_params,\n", + " budget_type=\"error\",\n", + " compute_daily=True,\n", + " )\n", + " },\n", + " new_nodes_maker_names=[\"starfit\"],\n", + " new_nodes_maker_indices=[0],\n", + " new_nodes_flow_to_nhm_seg=[44426],\n", + " graph_budget_type=\"warn\", # move to error\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0a2e182-0475-42cf-be92-2d36154cad7e", + "metadata": {}, + "outputs": [], + "source": [ + "# The \"inflow_exchange\" and \"prms_channel_flow_graph\" have been added to the model\n", + "pprint(model_dict, sort_dicts=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ece847a1-c229-4813-9dee-eb3be5d1cc80", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "if not run_dir.exists():\n", + " run_dir.mkdir()\n", + " model = pws.Model(model_dict)\n", + " model.run()\n", + " model.finalize()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0b8e0ec9-340c-4668-b483-9df90f8f5936", + "metadata": {}, + "outputs": [], + "source": [ + "if \"model\" in locals().keys():\n", + " model.processes[\"prms_channel_flow_graph\"].budget" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cdc97a64-807c-4fa3-a3a3-d4d404fd25ca", + "metadata": {}, + "outputs": [], + "source": [ + "wh_44426 = np.where(params.parameters[\"nhm_seg\"] == 44426)[0]\n", + "outflow_nodes = xr.open_dataarray(run_dir / \"node_outflows.nc\")[\n", + " :, wh_44426\n", + "].drop_vars(\"node_coord\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7af336c0-e846-4fd9-a2d0-6728c4c42d78", + "metadata": {}, + "outputs": [], + "source": [ + "xr.merge([outflow, outflow_nodes]).rename(\n", + " {\"seg_outflow\": \"NHM\", \"node_outflows\": \"STARFIT\"}\n", + ").hvplot(\n", + " width=plot_width,\n", + " height=plot_height,\n", + " ylabel=\"streamflow (cfs)\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "514e2590-a439-4ee5-90d2-625fa06f9934", + "metadata": {}, + "outputs": [], + "source": [ + "storage_nodes = xr.open_dataarray(run_dir / \"node_storages.nc\")[\n", + " :, -1\n", + "].drop_vars(\"node_coord\")\n", + "storage_nodes.hvplot(width=plot_width, height=plot_height)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3f31416c-f0ed-4adc-81bc-8e968f927b8a", + "metadata": {}, + "outputs": [], + "source": [ + "xr.merge([outflow, outflow_nodes, storage_nodes]).rename(\n", + " {\n", + " \"seg_outflow\": \"NHM\",\n", + " \"node_outflows\": \"Big Sandy Outflow\",\n", + " \"node_storages\": \"Big Sandy Storage\",\n", + " }\n", + ").hvplot(\n", + " width=plot_width,\n", + " height=plot_height,\n", + " ylabel=\"streamflow (cfs)\\nstorage (million cubic feet)\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "14432433-2385-4c21-992a-a05fc9524f11", + "metadata": {}, + "source": [ + "## FlowGraph as a post-process: Drive FlowGraph with STARFIT representation of Big Sandy and Pass-Through using NHM output files\n", + "Above we ran the full NHM with a `StarfitNode` at Big Sandy. But pywatershed is flexible and in the NHM configuration no two process representations are two-way coupled. See [figure in the extended release notes](https://ec-usgs.github.io/pywatershed/assets/img/pywatershed_NHM_model_graph.png). (Note that some PRMS configurations in pywatershed can be two-way coupled between Runoff and Soilzone and/or Canopy and Snow.) In this case, the `PRMSChannel` is one-way coupled (forced) buy the rest of the model. So we could use the output of the first, NHM run above without any reservoir representation and use its outupts to drive just the `FlowGraph` in the run above. We might call running `FlowGraph` in this way a \"post-process\". If one were running the no-reservoir model and looking at hypotheses of what FlowGraphs give better flow representations, this is the method you'd want to follow.\n", + "\n", + "So for this case we have a different helper function, `prms_channel_flow_graph_postprocess`, to which we supply most of the same information about the `FlowGraph`. However, we tell it about where it can find inputs from file rather than about an existing `model_dict` (as above).\n", + "\n", + "For additional extra fun and illustration, we'll not only add the `StarfitNode` for Big Sandy, we'll demonstrate that we can add additional nodes to the `FlowGraph` by putting a random `PassThroughNode` elsewhere on the domain. This node has no effect on the flows by design, but adding it here shows how additional nodes can easily be added to a `FlowGraph`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "56aaced3-9191-4acb-9043-24bcb2623e70", + "metadata": {}, + "outputs": [], + "source": [ + "control = pws.Control.load_prms(control_file, warn_unused_options=False)\n", + "control.edit_n_time_steps(ndays_run)\n", + "run_dir = nb_output_dir / \"fgr_starfit_post\"\n", + "control.options = control.options | {\n", + " \"input_dir\": domain_dir,\n", + " \"budget_type\": \"error\",\n", + " \"calc_method\": \"numba\",\n", + " \"netcdf_output_dir\": run_dir,\n", + " \"netcdf_output_var_names\": [\n", + " \"node_outflows\",\n", + " \"node_upstream_inflows\",\n", + " \"node_storages\",\n", + " ],\n", + "}\n", + "\n", + "params_file_channel = domain_dir / \"parameters_PRMSChannel.nc\"\n", + "params_channel = pws.parameters.PrmsParameters.from_netcdf(params_file_channel)\n", + "\n", + "if \"dis_hru\" in locals().keys():\n", + " del dis_hru\n", + "\n", + "dis_both_file = domain_dir / \"parameters_dis_both.nc\"\n", + "dis_both = pws.Parameters.from_netcdf(dis_both_file, encoding=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8c52566-9032-49ed-8012-382c9f5cd5dd", + "metadata": {}, + "outputs": [], + "source": [ + "sfp_ds = sf_params.to_xr_ds().copy()\n", + "cap_mult = 1.5\n", + "sfp_ds[\"GRanD_CAP_MCM\"] *= cap_mult\n", + "sf_params_new = pws.Parameters.from_ds(sfp_ds)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "134c8589-f15d-4174-b2c8-ffd0b7214b9f", + "metadata": {}, + "outputs": [], + "source": [ + "input_dir = nb_output_dir / \"fgr_nhm\" # use the output of the NHM run\n", + "\n", + "flow_graph = pws.prms_channel_flow_graph_postprocess(\n", + " control=control,\n", + " input_dir=input_dir,\n", + " prms_channel_dis=dis_both,\n", + " prms_channel_params=params_channel,\n", + " new_nodes_maker_dict={\n", + " \"starfit\": pws.hydrology.starfit.StarfitFlowNodeMaker(\n", + " None,\n", + " sf_params_new,\n", + " compute_daily=False,\n", + " budget_type=\"error\",\n", + " ),\n", + " \"pass_through\": pws.hydrology.pass_through_node.PassThroughNodeMaker(),\n", + " },\n", + " new_nodes_maker_names=[\"starfit\", \"pass_through\"],\n", + " new_nodes_maker_indices=[0, 0], # relative to the indvidual NodeMakers\n", + " new_nodes_flow_to_nhm_seg=[\n", + " 44426,\n", + " 44435,\n", + " ], # the second is a pass through above the first\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f5619ab-f399-4303-a6f5-4e656971424b", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "if not run_dir.exists():\n", + " run_dir.mkdir()\n", + " flow_graph.initialize_netcdf()\n", + " for istep in tqdm(range(control.n_times)):\n", + " control.advance()\n", + " flow_graph.advance()\n", + " flow_graph.calculate(1.0)\n", + " flow_graph.output()\n", + "\n", + " flow_graph.finalize()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f52c3100-3db4-489e-8d29-b755b0807cb6", + "metadata": {}, + "outputs": [], + "source": [ + "wh_44426 = np.where(params.parameters[\"nhm_seg\"] == 44426)[0]\n", + "outflow_nodes_post = (\n", + " xr.open_dataarray(run_dir / \"node_outflows.nc\")[:, wh_44426]\n", + " .drop_vars(\"node_coord\")\n", + " .rename(\"node_outflows_post\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7dd9e92e-1e96-478c-a381-d5b2fd0d45db", + "metadata": {}, + "outputs": [], + "source": [ + "xr.merge(\n", + " [\n", + " outflow,\n", + " outflow_nodes,\n", + " outflow_nodes_post,\n", + " ]\n", + ").rename(\n", + " {\n", + " \"seg_outflow\": \"NHM\",\n", + " \"node_outflows\": \"STARFIT\",\n", + " \"node_outflows_post\": f\"STARFIT CAP*{cap_mult}\",\n", + " }\n", + ").hvplot(width=plot_width, height=plot_height, ylabel=\"streamflow (cfs)\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c89c762a-77e4-44d1-a75b-560ea9f6ceda", + "metadata": {}, + "outputs": [], + "source": [ + "storage_nodes_post = (\n", + " xr.open_dataarray(run_dir / \"node_storages.nc\")[\n", + " :, -2\n", + " ] # pass through is the last node this time\n", + " .drop_vars(\"node_coord\")\n", + " .rename(\"node_storages_post\")\n", + ")\n", + "xr.merge(\n", + " [\n", + " storage_nodes,\n", + " storage_nodes_post,\n", + " ]\n", + ").hvplot(\n", + " width=plot_width,\n", + " height=plot_height,\n", + " ylabel=\"storage (million cubic feet)\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38758377-ee47-4c77-8bb7-721e7581b5ac", + "metadata": {}, + "outputs": [], + "source": [ + "xr.merge(\n", + " [\n", + " outflow,\n", + " outflow_nodes,\n", + " storage_nodes,\n", + " outflow_nodes_post,\n", + " storage_nodes_post,\n", + " ]\n", + ").rename(\n", + " {\n", + " \"seg_outflow\": \"NHM\",\n", + " \"node_outflows\": \"Big Sandy Outflow\",\n", + " \"node_storages\": \"Big Sandy Storage\",\n", + " \"node_outflows_post\": f\"Big Sandy Outflow CAP*{cap_mult}\",\n", + " \"node_storages_post\": f\"Big Sandy Storage CAP*{cap_mult}\",\n", + " }\n", + ").hvplot(\n", + " width=plot_width,\n", + " height=plot_height,\n", + " ylabel=\"streamflow (cfs)\\nstorage (million cubic feet)\",\n", + ")" + ] + } + ], + "metadata": { + "jupytext": { + "cell_metadata_filter": "-all", + "main_language": "python", + "notebook_metadata_filter": "-all" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython" + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}