Welcome
diff --git a/develop/install-docker.html b/develop/install-docker.html
index 70256e50d..c85232115 100644
--- a/develop/install-docker.html
+++ b/develop/install-docker.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/install-native.html b/develop/install-native.html
index 3d1c961c4..071fe0644 100644
--- a/develop/install-native.html
+++ b/develop/install-native.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/install-vm.html b/develop/install-vm.html
index 200447205..d5d75af49 100644
--- a/develop/install-vm.html
+++ b/develop/install-vm.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/installation-details.html b/develop/installation-details.html
index 4540491c5..17f63947c 100644
--- a/develop/installation-details.html
+++ b/develop/installation-details.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/installing-dependencies-let-the-machines-do-it.html b/develop/installing-dependencies-let-the-machines-do-it.html
index ccdb01e86..b4d67145d 100644
--- a/develop/installing-dependencies-let-the-machines-do-it.html
+++ b/develop/installing-dependencies-let-the-machines-do-it.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/intermediate-user.html b/develop/intermediate-user.html
index 9d0d67106..ef7e735bd 100644
--- a/develop/intermediate-user.html
+++ b/develop/intermediate-user.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/introduction-4.html b/develop/introduction-4.html
index 93e12316b..b14e5b949 100644
--- a/develop/introduction-4.html
+++ b/develop/introduction-4.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/list-of-expectations.html b/develop/list-of-expectations.html
index 88c67453c..3a2b41d8c 100644
--- a/develop/list-of-expectations.html
+++ b/develop/list-of-expectations.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/load-settings.html b/develop/load-settings.html
index add0507f2..68d631dc6 100644
--- a/develop/load-settings.html
+++ b/develop/load-settings.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/loading-code-dont-but-use-requirenamespace-when-you-do.html b/develop/loading-code-dont-but-use-requirenamespace-when-you-do.html
index e12dfb2eb..8f9a3b909 100644
--- a/develop/loading-code-dont-but-use-requirenamespace-when-you-do.html
+++ b/develop/loading-code-dont-but-use-requirenamespace-when-you-do.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/maintain-vm.html b/develop/maintain-vm.html
index bb804d21d..26260cae1 100644
--- a/develop/maintain-vm.html
+++ b/develop/maintain-vm.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/met-drivers.html b/develop/met-drivers.html
index 382f5fb2c..375aa3296 100644
--- a/develop/met-drivers.html
+++ b/develop/met-drivers.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/meta-analysis.html b/develop/meta-analysis.html
index 2a6227fbb..26e6e3cf3 100644
--- a/develop/meta-analysis.html
+++ b/develop/meta-analysis.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/miscellaneous.html b/develop/miscellaneous.html
index 9d3003263..6a2f91b1d 100644
--- a/develop/miscellaneous.html
+++ b/develop/miscellaneous.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/model-docker.html b/develop/model-docker.html
index 832f65a29..c9dee2f0a 100644
--- a/develop/model-docker.html
+++ b/develop/model-docker.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/models-biocro.html b/develop/models-biocro.html
index 5f98bbbcf..e2b86a719 100644
--- a/develop/models-biocro.html
+++ b/develop/models-biocro.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/models-clm.html b/develop/models-clm.html
index 994018d28..675ef1100 100644
--- a/develop/models-clm.html
+++ b/develop/models-clm.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/models-dalec.html b/develop/models-dalec.html
index 61b16db87..70e4258f7 100644
--- a/develop/models-dalec.html
+++ b/develop/models-dalec.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/models-ed.html b/develop/models-ed.html
index d9ae86bb4..f1e132145 100644
--- a/develop/models-ed.html
+++ b/develop/models-ed.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/models-gday.html b/develop/models-gday.html
index 889720a80..2fe00e1dd 100644
--- a/develop/models-gday.html
+++ b/develop/models-gday.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/models-ldndc.html b/develop/models-ldndc.html
index 7bb79fb44..089174c2c 100644
--- a/develop/models-ldndc.html
+++ b/develop/models-ldndc.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/models-linkages.html b/develop/models-linkages.html
index e310da43c..5d47436e5 100644
--- a/develop/models-linkages.html
+++ b/develop/models-linkages.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/models-lpjguess.html b/develop/models-lpjguess.html
index c8695a9e0..135be1ec3 100644
--- a/develop/models-lpjguess.html
+++ b/develop/models-lpjguess.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/models-maespa.html b/develop/models-maespa.html
index 0142898ce..ada917ab0 100644
--- a/develop/models-maespa.html
+++ b/develop/models-maespa.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/models-preles.html b/develop/models-preles.html
index b2232616d..79e97662a 100644
--- a/develop/models-preles.html
+++ b/develop/models-preles.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/models-sipnet.html b/develop/models-sipnet.html
index b8fb2296a..73601fc6b 100644
--- a/develop/models-sipnet.html
+++ b/develop/models-sipnet.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/models-stics.html b/develop/models-stics.html
index c0aad3424..17b7fa6a1 100644
--- a/develop/models-stics.html
+++ b/develop/models-stics.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/models_singularity.html b/develop/models_singularity.html
index 5a45f8989..18d66b4d1 100644
--- a/develop/models_singularity.html
+++ b/develop/models_singularity.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/narr.html b/develop/narr.html
index 1c44fc1b6..2f0a52f72 100644
--- a/develop/narr.html
+++ b/develop/narr.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/network-status-map.html b/develop/network-status-map.html
index c488ee7e0..75b0d11f6 100644
--- a/develop/network-status-map.html
+++ b/develop/network-status-map.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/nldas.html b/develop/nldas.html
index 23527da01..64761d4ce 100644
--- a/develop/nldas.html
+++ b/develop/nldas.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/osinstall.html b/develop/osinstall.html
index c0a4fc9b8..969f36d30 100644
--- a/develop/osinstall.html
+++ b/develop/osinstall.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/othervignettes.html b/develop/othervignettes.html
index e9ef5ef72..96b14eee3 100644
--- a/develop/othervignettes.html
+++ b/develop/othervignettes.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/overview-1.html b/develop/overview-1.html
index cfe9dd030..b97abbdd7 100644
--- a/develop/overview-1.html
+++ b/develop/overview-1.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/package-dependencies.html b/develop/package-dependencies.html
index 4c954361d..88ecb3d26 100644
--- a/develop/package-dependencies.html
+++ b/develop/package-dependencies.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/paleon.html b/develop/paleon.html
index f9d360671..481db6720 100644
--- a/develop/paleon.html
+++ b/develop/paleon.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/pecan-api.html b/develop/pecan-api.html
index 6df4e1c10..a39dd4c73 100644
--- a/develop/pecan-api.html
+++ b/develop/pecan-api.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/pecan-docker.html b/develop/pecan-docker.html
index 973b78ce4..e5dd9cb8a 100644
--- a/develop/pecan-docker.html
+++ b/develop/pecan-docker.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/pecan-git.html b/develop/pecan-git.html
index b3404dc5f..c071aaef7 100644
--- a/develop/pecan-git.html
+++ b/develop/pecan-git.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/pecan-in-a-nutshell.html b/develop/pecan-in-a-nutshell.html
index 674eb4040..1fef8c421 100644
--- a/develop/pecan-in-a-nutshell.html
+++ b/develop/pecan-in-a-nutshell.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/pecan-manual-setup.html b/develop/pecan-manual-setup.html
index b79002ed7..466e4bb42 100644
--- a/develop/pecan-manual-setup.html
+++ b/develop/pecan-manual-setup.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/pecan-models.html b/develop/pecan-models.html
index fad38afba..facb29490 100644
--- a/develop/pecan-models.html
+++ b/develop/pecan-models.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/pecan-project-api.html b/develop/pecan-project-api.html
index cf80c6fe2..696396efd 100644
--- a/develop/pecan-project-api.html
+++ b/develop/pecan-project-api.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/pecan-project-used-in-courses.html b/develop/pecan-project-used-in-courses.html
index 061bcb6c6..1ab88ddc9 100644
--- a/develop/pecan-project-used-in-courses.html
+++ b/develop/pecan-project-used-in-courses.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/pecan-remote.html b/develop/pecan-remote.html
index 572f4b728..a3fa355c4 100644
--- a/develop/pecan-remote.html
+++ b/develop/pecan-remote.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/pecan-standards.html b/develop/pecan-standards.html
index 11d2150e9..c81c17957 100644
--- a/develop/pecan-standards.html
+++ b/develop/pecan-standards.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/pecanXML.html b/develop/pecanXML.html
index 1c68e6323..efce0c34c 100644
--- a/develop/pecanXML.html
+++ b/develop/pecanXML.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/pecanvm.html b/develop/pecanvm.html
index 78c4149b3..8ee867ad3 100644
--- a/develop/pecanvm.html
+++ b/develop/pecanvm.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/project-overview.html b/develop/project-overview.html
index 07244aae6..8e7c65bf8 100644
--- a/develop/project-overview.html
+++ b/develop/project-overview.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/query-database.html b/develop/query-database.html
index 2711e99f4..29e527532 100644
--- a/develop/query-database.html
+++ b/develop/query-database.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/r-universe.html b/develop/r-universe.html
index e0521d5e4..46082ba19 100644
--- a/develop/r-universe.html
+++ b/develop/r-universe.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/rabbitmq.html b/develop/rabbitmq.html
index de07ea158..498660374 100644
--- a/develop/rabbitmq.html
+++ b/develop/rabbitmq.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/remote-data-module.html b/develop/remote-data-module.html
index 6d80ed33f..b5e01a391 100644
--- a/develop/remote-data-module.html
+++ b/develop/remote-data-module.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/remote-model-execution-with-pecan.html b/develop/remote-model-execution-with-pecan.html
index 508a6f3d7..4450c2e50 100644
--- a/develop/remote-model-execution-with-pecan.html
+++ b/develop/remote-model-execution-with-pecan.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/restful-api-endpoints.html b/develop/restful-api-endpoints.html
index 7da39db4a..a4b82c1ae 100644
--- a/develop/restful-api-endpoints.html
+++ b/develop/restful-api-endpoints.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/restore-of-bety-database.html b/develop/restore-of-bety-database.html
index 58c4d3a92..573b2a0a7 100644
--- a/develop/restore-of-bety-database.html
+++ b/develop/restore-of-bety-database.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/running-pecan-code-for-remotely.html b/develop/running-pecan-code-for-remotely.html
index c38831cbf..3e882ba37 100644
--- a/develop/running-pecan-code-for-remotely.html
+++ b/develop/running-pecan-code-for-remotely.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/search_index.json b/develop/search_index.json
index 1d92a987b..a31cbe78b 100644
--- a/develop/search_index.json
+++ b/develop/search_index.json
@@ -1 +1 @@
-[["index.html", "The Predictive Ecosystem Analyzer Welcome", " The Predictive Ecosystem Analyzer By: PEcAn Team 2024-03-02 Welcome Ecosystem science, policy, and management informed by the best available data and models Our Mission: Develop and promote accessible tools for reproducible ecosystem modeling and forecasting PEcAn Website Public Chat Room Github Repository .page-inner{max-width:80% !important;} "],["project-overview.html", "1 Project Overview", " 1 Project Overview The Predictive Ecosystem Analyzer (PEcAn) is an integrated informatics toolbox for ecosystem modeling (Dietze et al. 2013, LeBauer et al. 2013). PEcAn consists of: An application program interface (API) that encapsulates an ecosystem model, providing a common interface, inputs, and output. Core utilities for handling and tracking model runs and the flows of information and uncertainties into and out of models and analyses An accessible web-based user interface and visualization tools An extensible collection of modules to handle specific types of analyses (sensitivity, uncertainty, ensemble), model-data syntheses (benchmarking, parameter data assimilation, state data assimilation), and data processing (model inputs and data constraints) This project is motivated by the fact that many of the most pressing questions about global change are limited by our ability to synthesize existing data and strategically prioritize the collection of new data. This project seeks to improve this ability by developing a framework for integrating multiple data sources in a sensible manner. The workflow system allows ecosystem modeling to be more reproducible, automated, and transparent in terms of operations applied to data, and thus ultimately more comprehensible to both peers and the public. It reduces the redundancy of effort among modeling groups, facilitate collaboration, and make models more accessible the rest of the research community. PEcAn is not itself an ecosystem model, and it can be used to with a variety of different ecosystem models; integrating a model involves writing a wrapper to convert inputs and outputs to and from the standards used by PEcAn. Currently, PEcAn supports multiple models listed PEcAn Models. Acknowledgements The PEcAn project is supported financially by the following: National Science Foundation (NSF): 1062547 1062204 1241894 1261582 1318164 1346748 1458021 1638577 1655095 1702996 National Aeronautics and Space Administration (NASA) NNX14AH65G NNX16AO13H 80NSSC17K0711 Advanced Research Projects Agency-Energy (ARPA-E) DE-AR0000594 Department of Defense, Strategic Environmental Research and Development Program (DOD-SERDP), grant RC2636 Energy Biosciences Institute, University of Illinois Amazon Web Services (AWS) Google Summer of Code BETYdb is a product of the Energy Biosciences Institute at the University of Illinois at Urbana-Champaign. We gratefully acknowledge the great effort of other researchers who generously made their own data available for further study. PEcAn is a collaboration among research groups at the Department of Earth And Environment at Boston University, the Energy Biosciences Institute at the University of Illinois, the Image Spatial Data Analysis group at NCSA, the Department of Atmospheric & Oceanic Sciences at the University Wisconsin-Madison, the Terrestrial Ecosystem Science & Technology (TEST) Group at Brookhaven National Laboratory, and the Joint Global Change Research Institute (JGCRI) at the Pacific Northwest National Laboratory. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the author(s) and do not necessarily reflect the views of the NSF, NASA, Boston University, University of Illinois, Brookhaven National Lab, Pacific National Lab, Battelle, the US Department of Defense, or the US Department of Energy. PEcAn Publications Fer I, R Kelly, P Moorcroft, AD Richardson, E Cowdery, MC Dietze. 2018. Linking big models to big data: efficient ecosystem model calibration through Bayesian model emulation. Biogeosciences Discussions doi:10.5194/bg-15-5801-2018 Feng X, Uriarte M, González G, et al. Improving predictions of tropical forest response to climate change through integration of field studies and ecosystem modeling. Glob Change Biol. 2018;24:e213–e232.doi:10.1111/gcb.13863 Dietze, M. C. (2017), Prediction in ecology: a first-principles framework. Ecol Appl, 27: 2048-2060. doi:10.1002/eap.1589 Fisher RA, Koven CD, Anderegg WRL, et al. 2017. Vegetation demographics in Earth System Models: A review of progress and priorities. Glob Change Biol. doi:10.1111/gcb.13910 Rollinson, C. R., Liu, Y., Raiho, A., Moore, D. J.P., McLachlan, J., Bishop, D. A., Dye, A., Matthes, J. H., Hessl, A., Hickler, T., Pederson, N., Poulter, B., Quaife, T., Schaefer, K., Steinkamp, J. and Dietze, M. C. (2017), Emergent climate and CO2 sensitivities of net primary productivity in ecosystem models do not agree with empirical data in temperate forests of eastern North America. Glob Change Biol. Accepted Author Manuscript. doi:10.1111/gcb.13626 LeBauer, D., Kooper, R., Mulrooney, P., Rohde, S., Wang, D., Long, S. P. and Dietze, M. C. (2017), betydb: a yield, trait, and ecosystem service database applied to second-generation bioenergy feedstock production. GCB Bioenergy. doi:10.1111/gcbb.12420 Rogers A, BE Medlyn, J Dukes, G Bonan, S von Caemmerer, MC Dietze, J Kattge, ADB Leakey, LM Mercado, U Niinemets, IC Prentice, SP Serbin, S Sitch, DA Way, S Zaehle. 2017. “A Roadmap for Improving the Representation of Photosynthesis in Earth System Models” New Phytologist 213(1):22-42 doi:10.1111/nph.14283 Shiklomanov. A, MC Dietze, T Viskari, PA Townsend, SP Serbin. 2016 “Quantifying the influences of spectral resolution on uncertainty in leaf trait estimates through a Bayesian approach to RTM inversion” Remote Sensing of the Environment 183: 226-238. doi:10.1016/j.rse.2016.05.023 Viskari et al. 2015 Model-data assimilation of multiple phenological observations to constrain and forecast leaf area index. Ecological Applications 25(2): 546-558. doi:10.1890/14-0497.1 Dietze, M. C., S. P. Serbin, C. Davidson, A. R. Desai, X. Feng, R. Kelly, R. Kooper, D. LeBauer, J. Mantooth, K. McHenry, and D. Wang (2014) A quantitative assessment of a terrestrial biosphere model’s data needs across North American biomes. Journal of Geophysical Research-Biogeosciences doi:10.1002/2013jg002392 LeBauer, D.S., D. Wang, K. Richter, C. Davidson, & M.C. Dietze. (2013). Facilitating feedbacks between field measurements and ecosystem models. Ecological Monographs. doi:10.1890/12-0137.1 Wang, D, D.S. LeBauer, and M.C. Dietze(2013) Predicting yields of short-rotation hybrid poplar (Populus spp.) for the contiguous US through model-data synthesis. Ecological Applications doi:10.1890/12-0854.1 Dietze, M.C., D.S LeBauer, R. Kooper (2013) On improving the communication between models and data. Plant, Cell, & Environment doi:10.1111/pce.12043 PEcAn Project Google Scholar page Longer / auto-updated list of publications that mention PEcAn’s full name in Google Scholar "],["contributor-covenant-code-of-conduct.html", "2 Contributor Covenant Code of Conduct", " 2 Contributor Covenant Code of Conduct Our Pledge In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. Our Standards Examples of behavior that contributes to creating a positive environment include: Using welcoming and inclusive language Being respectful of differing viewpoints and experiences Gracefully accepting constructive criticism Focusing on what is best for the community Showing empathy towards other community members Examples of unacceptable behavior by participants include: The use of sexualized language or imagery and unwelcome sexual attention or advances Trolling, insulting/derogatory comments, and personal or political attacks Public or private harassment Publishing others’ private information, such as a physical or electronic address, without explicit permission Other conduct which could reasonably be considered inappropriate in a professional setting Our Responsibilities Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. Scope This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at pecanproj[at]gmail.com. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project’s leadership. Attribution This Code of Conduct is adapted from the Contributor Covenant version 1.4, available at http://contributor-covenant.org/version/1/4. "],["about-the-pecan-book.html", "3 About the PEcAn Book", " 3 About the PEcAn Book This book serves as documentation for the PEcAn Project. It contains descriptions of topics necessary to inform a beginner and advanced user as well as requisite materials for developers. It does not contain low-level descriptions of functions within PEcAn. Our aim for this documentation is to educate you about the PEcAn software, the possibilities of its usage, and the standards,expectations, and core workflows for developers. This book is organized four main topics: Introduction - Brief explanation of PEcAn, how to obtain the PEcAn VM, and explanation of basic web interface functions. Tutorials/Demos/Workflows - All User and Developer tutorials/demos/workflows to explain how to use and add to PEcAn in different ways. Topical Pages - Explanation of main PEcAn components and how they fit together. Appendix - External documentation and sources of information and a FAQ section. "],["general-feedbackcommentssuggestions.html", "3.1 General Feedback/Comments/Suggestions", " 3.1 General Feedback/Comments/Suggestions We want your ideas, thoughts, comments, and suggestions! As a community we are committed to creating an inclusive and supportive atmosphere so we all to reach out to us in the following ways: Github: https://github.com/PecanProject/pecan This is the main hub of communication surrounding PEcAn development. Check out the issues section to see known bugs, upcoming features, and ideas for future development. Feel free to comment on existing issues or open new ones with questions, bug reports, feature requests, and/or ideas. Slack: https://pecanproject.slack.com/ Slack serves as our day to day mode of communication. To join us in slack you will need to create an account first. This is done in 3 steps: Request an inivitation to join Slack, this will be send by email to address you provided. Check your inbox for an email from Slack with subject “Rob Kooper has invited you to join a Slack workspace”. This email should have a link that you can click to join slack. When you click a webpage will open up that asks you to create an account, once that is done you can login into the slack chatrooms. Email: pecanproj[at]gmail.com If you do not wish your communication with the team to be public, send us an email at the address above and we will get back to you as soon as possible. "],["bookediting.html", "3.2 Editing this book", " 3.2 Editing this book The file organization of this documentation can be described simply as follows: Each chapter is in its own file (within the corresponding section). Each group of chapters (i.e. “part” in LaTeX) is in its own directory. Sections and chapters are rendered (and numbered) in alpha-numerical order of their corresponding file names. Therefore, each section directory and chapter file name should be prefixed with a two-digit (zero-padded) number. File and directory names should be as similar as possible to the name of the corresponding chapter or section. For instance, the file name for this chapter’s source file is 06_reference/10_editing_this_book.Rmd. This numbering means that if you need to create an additional chapter before an existing one, you will have to renumber all chapters following it. To ensure correct rendering, you should also make sure that each chapter starts with a level 1 heading (# heading). For instance, this chapter’s source starts with: # Editing this book {#bookediting} The file organization of this documentation can be described simply as follows: ... Furthermore, to keep the organization consistent, each chapter should have exactly one level 1 heading (i.e. do not combine multiple chapters into a single file). In other words, do not spread a single chapter across multiple files, and do not put multiple chapters in the same file. Each section directory has a file starting with 00 that contains only the section (or “Part”) title. This is used to create the greyed-out section headers in the rendered HTML. For instance, this section has a file called 00_introduction.Rmd which contains only the following: # (PART) Introduction {-} To cross-reference a different section, use that section’s unique tag (starts with #; appears next to the section heading surrounded in curly braces). For instance, the following Markdown contains two sections that cross-reference each other: ## Introduction {#intro} Here is the intro. This is a link to the [next section](#section-one). ## First section. {#section-one} As mentioned in the [previous section](#intro). If no header tag exists for a section you want to cross-reference, you should create one. We have no strict rules about this, but it’s useful to have tags that give some sense of their parent hierarchy and reference their parent sections (e.g. #models, #models-ed, and #models-ed-xml to refer to a chapter on models, with a subsection on ED and a sub-subsection on ED XML configuration). If section organization changes, it is fine to move header tags, but avoid altering existing tags as this will break all the links pointing to that tag. (Note that it is also possible to link to section headings by their exact title. However, this is not recommended because these section titles could change, which would break the links.) When referring to PEcAn packages or specific functions, it is a good idea to link to the rendered package documentation. For instance, here are links to the models/ed package, the PEcAn.ED2::modify_ed2in function, and the PEcAnRTM package vignette. If necessary, you can also link directly to specific lines or blocks in the source code on GitHub, like this. (To get a link to a line, click its line number. To then select a block, shift-click another line number.) To insert figures, use knitr::include_graphics(\"path/to/figure.png\") inside an R code chunk. For example: ```{r} knitr::include_graphics("04_advanced_user_guide/images/Input_ID_name.png") ``` Note that image file names are relative to the book_source directory, NOT to the markdown file. In other words, if myimage.png was in the same directory as this file, I would still have to reference it as 06_reference/myimage.png – I could not just do myimage.png. The size, caption, and other properties of the rendered image can be controlled via chunk options. For additional information about how bookdown works (including information about its syntax), see the Bookdown free online book. "],["how-to-create-your-own-version-of-documentation.html", "3.3 How to create your own version of Documentation", " 3.3 How to create your own version of Documentation To create your own version of documentation you’ll need to follow these steps: These procedures assume you have an github account, you forked pecan, you have cloned pecan locally, and have a TRAVIS account. 1. Create a repository under your github account with the name “pecan-documentation”. Clear it of any files. Set up the repository with Github Pages by going to the settings tab for that repository. 2. Create a personal access token for github: https://help.github.com/en/articles/creating-a-personal-access-token-for-the-command-line and copy it. 3. Create a TRAVIS environment variable called GITHUB_PAT and save the access token you made as a secret variable. 4. Create a branch from your local pecan repository with a name that starts with release/. (ie. Release/vtonydoc) 5. Make whichever changes you would like to the documentation and push it up to your fork. From here TRAVIS will build your documentation. The web version of your documentation will be rendered with the url following the structure: username.github.io/pecan-documentation/pattern_after_release/ "],["pecan-manual-setup.html", "4 Install PEcAn", " 4 Install PEcAn These instructions are provided to document how to install and setup PEcAn. It includes: Virtual machine (Recommended for students and new users) PEcAn Docker (Recommended for development and production deployment) Local installation from r-universe (Recommended for development only) PEcAn OS specific installation The PEcAn code and necessary infrastructure can be obtained and compiled in different ways. This set of instructions will help facilitate your path and the steps necessary to move forward to have a fully a functioning PEcAn environment. "],["install-vm.html", "4.1 Virtual Machine (VM)", " 4.1 Virtual Machine (VM) The PEcAn virtual machine consists of all of PEcAn pre-compiled within a Linux operating system and saved in a “virtual machine” (VM). Virtual machines allow for running consistent set-ups without worrying about differences between operating systems, library dependencies, compiling the code, etc. Install VirtualBox This is the software that runs the virtual machine. You can find the download link and instructions at https://www.virtualbox.org. NOTE: On Windows you may see a warning about Logo testing, it is okay to ignore the warning. Download the PEcAn VM You can find the download link at http://opensource.ncsa.illinois.edu/projects/artifacts.php?key=PECAN, under the “Files” header. Click the “.ova” file to begin the download. Note that the file is ~7 GB, so this download can take several minutes to hours depending on your connection speed. Also, the VM requires >4 GB of RAM to operate correctly. Please check current usage of RAM and shutdown processes as needed. Import the VM Once the download is complete, open VirtualBox. In the VirtualBox menus, go to “File” → “Import Appliance” and locate the downloaded “.ova” file. For Virtualbox version 5.x: In the Appliance Import Settings, make sure you select “Reinitialize the MAC address of all network cards” (picture below). This is not selected by default and can result in networking issues since multiple machines might claim to have the same network MAC Address. For Virtualbox versions starting with 6.0, there is a slightly different interface (see figure). Select “Generate new MAC addresses for all network adapters” from the MAC Address Policy: NOTE: If you experience network connection difficulties in the VM with this enabled, try re-importing the VM without this setting selected). Finally, click “Import” to build the Virtual Machine from its image. Launch PEcAn Double click the icon for the PEcAn VM. A terminal window will pop up showing the machine booting up which may take a minute. It is done booting when you get to the pecan login: prompt. You do not need to login as the VM behaves like a server that we will be accessing through you web browser. Feel free to minimize the VM window. If you do want to login to the VM, the credentials are as follows: username: carya, password: illinois (after the pecan tree, [Carya illinoinensis][pecan-wikipedia]). Open the PEcAn web interface With the VM running in the background, open any web browser on the same machine and navigate to localhost:6480/pecan/ to start the PEcAn workflow. (NOTE: The trailing backslash may be necessary depending on your browser) Advanced interaction with the VM is mostly done through the command line. You can perform these manipulations from inside the VM window. However, the VM is also configured for SSH access (username carya, hostname localhost, port 6422). For instance, to open an interactive shell inside the VM from a terminal on the host machine, use a command like ssh -l carya -p 6422 localhost (when prompted, the password is illinois, as above). These steps should be enough to get you started with running models and performing basic analyses with PEcAn. For advanced details on VM configuration and maintenance, see the Advanced VM section. "],["install-docker.html", "4.2 Docker", " 4.2 Docker This is a short documentation on how to start with Docker and PEcAn. This will not go into much detail about about how to use Docker – for more details, see the main Docker topical page. Install Docker. Follow the instructions for your operating system at https://www.docker.com/community-edition#/download. Once Docker is installed, make sure it is running. To test that Docker is installed and running, open a terminal and run the following commands: docker run hello-world If successful, this should return a message starting with \"Hello from Docker!\". If this doesn’t work, there is something wrong with your configuration. You may need to open the Docker desktop app to complete the installation. Refer to the Docker documentation for debugging. NOTE: Depending on how Docker is installed and configured, you may have to run this command as sudo. Try running the command without sudo first. If that fails, but running as sudo succeeds, see these instructions for steps to use Docker as a non-root user. Install docker compose. If you are running this on a Mac or Windows this might be already installed. On Linux you will need to install this it separately; see https://docs.docker.com/compose/install/. To see if docker compose is successfully installed, use the following shell command: docker compose version This should print the current version of docker-compose. We have tested the instruction below with versions of docker compose 1.22 and above. Download the PEcAn docker compose file. It is located in the root directory of the PEcAn source code. For reference, here are direct links to the latest stable version and the bleeding edge development version. (To download the files, you should be able to right click the link and select “Save link as”.) Make sure the file is saved as docker-compose.yml in a directory called pecan. Initialize the PEcAn database and data images. The following docker compose commands are used to download all the data PEcAn needs to start working. For more on how they work, see our Docker topical pages. Create and start the PEcAn database container (without any data) docker compose up -d postgres If this is successful, the end of the output should look like the following: Creating pecan-postgres-1 ... done “Initialize” the data for the PEcAn database. docker run --rm --network pecan_pecan pecan/db This should produce a lot of output describing the database operations happening under the hood. Some of these will look like errors, but this is normal. This command succeeded if the output ends with the following (the syntax for creating a new user for accessing BetyDB): docker compose run bety user 'login' 'password' 'full name' 'email' 1 1 Add a user to BetyDB using the example syntax provided as the last line of the output of the previous step: # guest user docker compose run --rm bety user guestuser guestuser "Guest User" guestuser@example.com 4 4 # example user docker compose run --rm bety user carya illinois "Carya Demo User" carya@example.com 1 1 Download and configure the core PEcAn database files. docker run -ti --rm --network pecan_pecan --volume pecan_pecan:/data --env FQDN=docker pecan/data:develop This will produce a lot of output describing file operations. This command succeeded if the output ends with the following: ###################################################################### Done! ###################################################################### Download the pecan/docker/env.example & save it as .env file. Now, open the .env file & uncomment the following lines: COMPOSE_PROJECT_NAME=pecan PECAN_VERSION=develop Setting PECAN_VERSION=develop indicates that you want to run the bleeding-edge develop branch, meaning it may have bugs. To go ahead with the stable version you may set PECAN_VERSION=latest or PECAN_VERSION=<release-number> (For example 1.7.0). You can look at the list of all the releases of PEcAn to see what options are availble. Start the PEcAn stack. Assuming all of the steps above completed successfully, start the full stack by running the following shell command: docker compose up -d If all of the containers started successfully, you should be able to access the various components from a browser via the following URLs (if you run these commands on a remote machine replace localhost with the actual hostname). PEcAn documentation and home page – http://pecan.localhost PEcAn web interface (running models) – http://pecan.localhost/pecan/ (NOTE: The trailing slash is necessary.) BETY web interface – http://pecan.localhost/bety/ (Default username: carya, default password: illinois) RStudio interface – http://pecan.localhost/rstudio/ (Default username: carya, default password: illinois) Monitor, service that monitors models and shows all models that are online as well as how many instances are online and the number of jobs waiting. The output is in JSON – http://pecan.localhost/monitor/ To shut down the docker images run docker-compose stop. For troubleshooting and advanced configuration, see our Docker topical pages. "],["r-universe.html", "4.3 Installation From r-universe", " 4.3 Installation From r-universe PEcAn.all and all it’s dependencies can be installed from r-universe with the following code: # Enable repository from pecanproject options(repos = c( pecanproject = 'https://pecanproject.r-universe.dev', CRAN = 'https://cloud.r-project.org')) # Download and install PEcAn.all in R install.packages('PEcAn.all') If you are planning on working on PEcAn development with a local install, consider adding the above options() to a project-level .Rprofile as well. For example, by using usethis::edit_r_profile(\"project\"). This will allow devtools to install dependencies from r-universe, for example devtools::install_deps(\"models/ed\") should work when “https://pecanproject.r-universe.dev” is set as a repo. NOTE: functionality of a local install will be limited without a connection to a BETYdb instance. Instructions to install BETY. "],["install-native.html", "4.4 (Advanced) Native install", " 4.4 (Advanced) Native install The full PEcAn system has a lot of dependencies, including R packages, compiled C and Fortran libraries, and system services and utilities. Installing all of these side by side, and getting them to work harmoniously, is very time-consuming and challenging, which is why we strongly encourage new users to use the VM or Docker if possible. In a nutshell, the process for manual installation is as follows: Download the PEcAn source code from GitHub. The recommended way to do this is with the shell command git clone, i.e. git clone https://github.com/pecanproject/pecan. Download the BETY source code from GitHub. Install the PEcAn R packages and their dependencies. This can be done by running the shell command make inside the PEcAn source code directory. Note that many of the R packages on which PEcAn depends have system dependencies that are not included by default on most operating systems, but almost all of which should be available via your operating system’s package manager (e.g. Homebrew for MacOS, apt for Ubuntu/Debian/Mint Linux, yum for RedHat Fedora/CentOS). Install and configure PostgreSQL Install and configure the Apache web server. For more details, see our notes about OS Specific Installations. "],["user-section.html", "5 Tutorials ", " 5 Tutorials "],["pecan-in-a-nutshell.html", "5.1 How PEcAn Works in a nutshell", " 5.1 How PEcAn Works in a nutshell PEcAn provides an interface to a variety of ecosystem models and attempts to standardize and automate the processes of model parameterization, execution, and analysis. First, you choose an ecosystem model, then the time and location of interest (a site), the plant community (or crop) that you are interested in simulating, and a source of atmospheric data from the BETY database (LeBauer et al, 2010). These are set in a “settings” file, commonly named pecan.xml which can be edited manually if desired. From here, PEcAn will take over and set up and execute the selected model using your settings. The key is that PEcAn uses models as-is, and all of the translation steps are done within PEcAn so no modifications are required of the model itself. Once the model is finished it will allow you to create graphs with the results of the simulation as well as download the results. It is also possible to see all past experiments and simulations. There are two ways of using PEcAn, via the web interface and directly within R. Even for users familiar with R, using the web interface is a good place to start because it provides a high level overview of the PEcAn workflow. The quickest way to get started is to download the virtual machine or use an AWS instance. "],["demo-table.html", "5.2 PEcAn Demos", " 5.2 PEcAn Demos The following Tutorials assume you have installed PEcAn. If you have not, please consult the PEcAn Installation Section. Type Title Web Link Source Rmd Demo Basic Run html Rmd Demo Uncertainty Analysis html Rmd Demo Output Analysis html Rmd Demo MCMC html Rmd Demo Parameter Assimilation html Rmd Demo State Assimilation html Rmd Demo Sensitivity html Rmd Vignette Allometries html Rmd Vignette MCMC html Rmd Vignette Meteorological Data html Rmd Vignette Meta-Analysis html Rmd Vignette Photosynthetic Response Curves html Rmd Vignette Priors html Rmd Vignette Leaf Spectra:PROSPECT inversion html Rmd "],["demo-1.html", "5.3 Demo 01: Basic Run PEcAn", " 5.3 Demo 01: Basic Run PEcAn 5.3.0.1 Objective We will begin by exploring a set of web-based tools that are designed to run single-site model runs. A lot of the detail about what’s going on under the hood, and all the outputs that PEcAn produces, are left to Demo 2. This demo will also demonstrate how to use PEcAn outputs in additional analyses outside of PEcAn. 5.3.0.2 PEcAn URL In the following demo, URL is the web address of a PEcAn server and will refer to one of the following: If you are doing a live demo with the PEcAn team, URL was provided If you are running the PEcAn virtual machine: URL = localhost:6480 If you are running PEcAn using Amazon Web Services (AWS), URL is the Public IP If you are running PEcAn using Docker, URL is pecan.localhost If you followed instructions found in Install PEcAn by hand, URL is your server’s IP 5.3.0.3 Start PEcAn: Enter URL in your web browser Click the link for the PEcAn web interface Click the ‘Next’ button in the sidebar to move to the “Site Selection” page. 5.3.0.4 Site Selection 5.3.0.5 Host Select the local machine “pecan”. Other options exist if you’ve read and followed instructions found in Remote execution with PEcAn. 5.3.0.6 Model Select SIPNET (r136) from the available models because it is quick & simple. Reference material can be found in Models in PEcAn 5.3.0.7 Site Group To filter sites, you can select a specific group of sites. For this tutorial we will use Ameriflux. 5.3.0.8 Conversion: Select the conversion check box, to show all sites that PEcAn is capable of generating model drivers for automatically. By default (unchecked), PEcAn only displays sites where model drivers already exist in the system database 5.3.0.9 Site: For this tutorial, type US-NR1 in the search box to display the Niwot Ridge Ameriflux site (US-NR1), and then click on the pin icon. When you click on a site’s flag on the map, it will give you the name and location of the site and put that site in the “Site:” box on the left hand site, indicating your current selection. Once you are finished with the above steps, click “Next”. 5.3.0.10 Run Specification Next we will specify settings required to run the model. Be aware that the inputs required for any particular model may vary somewhat so there may be addition optional or required input selections available for other models. 5.3.0.11 PFT (Plant Functional Type): Niwot Ridge is temperate coniferous. Available PFTs will vary by model and some models allow multiple competing PFTs to be selected. Also select soil to control the soil parameters 5.3.0.12 Start/End Date: Select 2003/01/01 to 2006/12/31. In general, be careful to select dates for which there is available driver data. 5.3.0.13 Weather Data: For Sipnet.climna select “Use AmerifluxLBL” from the Available Meteorological Drivers. 5.3.0.14 Optional Settings: Leave all blank for demo run Email sends a message when the run is complete. Use Brown Dog will use the Brown Dog web services in order to do input file conversions. (Note: Required if you select Use NARR for Weather Data) Edit pecan.xml allows you to configure advanced settings via the PEcAn settings file Edit model config pauses the workflow after PEcAn has written all model specific settings but before the model runs are called and allows users to configure any additional settings internal to the model. Advanced Setup controls ensemble and sensitivity run settings discussed in Demo 2. Finally, click “Next” to start the model run. 5.3.0.15 Data Use Policies The last step before the run starts is to read and agree to AmeriFlux’s data policy and give a valid username. If you don’t already have an Ameriflux username, click “register here” and create one. If you selected a different data source, this step may or may not be needed: you will need to agree to a data policy if your source has one, but if it doesn’t then the run will start immediately. 5.3.0.16 If you get an error in your run If you get an error in your run as part of a live demo or class activity, it is probably simplest to start over and try changing options and re-running (e.g. with a different site or PFT), as time does not permit detailed debugging. If the source of the error is not immediately obvious, you may want to take a look at the workflow.Rout to see the log of the PEcAn workflow or the logfile.txt to see the model execution output log and then refer to the Documentation or the Chat Room for help. 5.3.0.17 Model Run Workflow 5.3.0.18 MET Process: First, PEcAn will download meteorological data based on the type of the Weather Data you chose, and process it into the specific format for the chosen model 5.3.0.19 TRAIT / META: PEcAn then estimates model parameters by performing a meta-analysis of the available trait data for a PFT. TRAIT will extract relevant trait data from the database. META performs a hierarchical Bayes meta-analysis of available trait data. The output of this analysis is a probability distribution for each model parameter. PEcAn selects the median value of this parameter as the default, but in Demo 2 we will see how PEcAn can use this parameter uncertainty to make probabilistic forecasts and assess model sensitivity and uncertainty. Errors at this stage usually indicate errors in the trait database or incorrectly specified PFTs (e.g. defining a variable twice). 5.3.0.20 CONFIG: writes model-specific settings and parameter files 5.3.0.21 MODEL: runs model. 5.3.0.22 OUTPUT: All model outputs are converted to standard netCDF format 5.3.0.23 ENSEMBLE & SENSITIVITY: If enabled post-process output for these analyses If at any point a Stage Name has the Status “ERROR” please notify the PEcAn team member that is administering the demo or feel free to do any of the following: Refer to the PEcAn Documentation for documentation Post the end of your workflow log on our Slack Channel chat Post an issue on Github. The entire PEcAn team welcomes any questions you may have! If the Finished Stage has a Status of “DONE”, congratulations! If you got this far, you have managed to run an ecosystem model without ever touching a line of code! Now it’s time to look at the results click Finished. FYI, adding a new model to PEcAn does not require modification of the model’s code, just the implementation of a wrapper function. 5.3.0.24 Output and Visualization For now focus on graphs, we will explore all of PEcAn’s outputs in more detail in Demo 02. 5.3.0.25 Graphs Select a Year and Y-axis Variable, and then click ‘Plot run/year/variable’. Initially leave the X-axis as time. Within this figure the points indicate the daily mean for the variable while the envelope encompasses the diurnal variability (max and min). Variable names and units are based on a standard netCDF format. Try looking at a number of different output variables over different years. Try changing the X-axis to look at bivariate plots of how different output variables are related to one another. Be aware that PEcAn currently runs a moving min/mean/max through bivariate plots, just as it does with time series plots. In some cases this makes more sense than others. 5.3.0.26 Alternative Visualization: R Shiny Click on Open SHINY, which will open a new browser window. The shiny app will automatically access your run’s output files and allow you to visualize all output variables as a function of time. Use the pull down menu under Variable Name to choose whichever output variable you wish to plot. 5.3.0.27 Model Run Archive Return to the output window and Click on the HISTORY button. Click on any previous run in the “ID” column to go to the current state of that run’s execution – you can always return to old runs and runs in-progress this way. The run you just did should be the more recent entry in the table. For the next analysis, make note of the ID number from your run. 5.3.0.28 Next steps 5.3.0.28.1 Analyzing model output Follow this tutorial, [Analyze Output] to learn how to open model output in R and compare to observed data 5.3.0.29 DEMO 02 Demo 02: Sensitivity and Uncertainty Analysis will show how to perform Ensemble & Sensitivity Analyses through the web interface and explore the PEcAn outputs in greater detail, including the trait meta-analysis "],["demo-2.html", "5.4 Demo 02: Sensitivity and Uncertainty Analysis", " 5.4 Demo 02: Sensitivity and Uncertainty Analysis In Demo 2 we will be looking at how PEcAn can use information about parameter uncertainty to perform three automated analyses: Ensemble Analysis: Repeat numerous model runs, each sampling from the parameter uncertainty, to generate a probability distribution of model projections. Allows us to put a confidence interval on the model Sensitivity Analysis: Repeats numerous model runs to assess how changes in model parameters will affect model outputs. Allows us to identify which parameters the model is most sensitive to. Uncertainty Analysis: Combines information about model sensitivity with information about parameter uncertainty to determine the contribution of each model parameter to the uncertainty in model outputs. Allow us to identify which parameters are driving model uncertainty. 5.4.1 Run Specification Return to the main menu for the PEcAn web interface: Click the “Start Over” button or navigate back to URL/pecan/ Repeat the steps for site selection and run specification from Demo 01, but also click on “Advanced setup”, then click Next. By clicking Advanced setup, PEcAn will first show an Analysis Menu, where we are going to specify new settings. For an ensemble analysis, increase the number of runs in the ensemble, in this case set Runs to 50. In practice you would want to use a larger ensemble size (100-5000) than we are using in the demo. The ensemble analysis samples parameters from their posterior distributions to propagate this uncertainty into the model output. PEcAn’s sensitivity analysis holds all parameters at their median value and then varies each parameter one-at-a-time based on the quantiles of the posterior distribution. PEcAn also includes a handy shortcut, which is the default behavior for the web interface, that converts a specified standard deviation into its Normal quantile equivalent (e.g. 1 and -1 are converted to 0.157 and 0.841). In this example set Sensitivity to -2,-1,1,2 (the median value, 0, occurs by default). We also can tell PEcAn which variable to run the sensitivity on. Here, set Variables to NEE, so we can compare against flux tower NEE observations. Click Next 5.4.2 Additional Outputs: The PEcAn workflow will take considerably longer to complete since we have just asked for over a hundred model runs. Once the runs are complete you will return to the output visualization page were there will be a few new outputs to explore, as well as outputs that were present earlier that we’ll explore in greater details: 5.4.2.1 Run ID: While the sensitivity and ensemble analyses synthesize across runs, you can also select individual runs from the Run ID menu. You can use the Graphs menu to visualize each individual run, or open individual runs in Shiny 5.4.2.2 Inputs: This menu shows the contents of /run which lets you look at and download: A summary file (README.txt) describing each run: location, run ID, model, dates, whether it was in the sensitivity or ensemble analysis, variables modified, etc. The model-specific input files fed into the model The jobs.sh file used to submit the model run 5.4.2.3 Outputs: This menu shows the contents of /out. A number of files generated by the underlying ecosystem model are archived and available for download. These include: Output files in the standardized netCDF ([year].nc) that can be downloaded for visualization and analysis (R, Matlab, ncview, panoply, etc) Raw model output in model-specific format (e.g. sipnet.out). Logfile.txt contains job.sh & model error, warning, and informational messages 5.4.2.4 PFTs: This menu shows the contents of /pft. There is a wide array of outputs available that are related to the process of estimating the model parameters and running sensitivity/uncertainty analyses for a specific Plant Functional Type. TRAITS: The Rdata files trait.data.Rdata and jagged.data.Rdata are, respectively, the available trait data extracted from the database that was used to estimate the model parameters and that same data cleaned and formatted for the statistical code. The list of variables that are queried is determined by what variables have priors associated with them in the definition of the PFTs. Priors are output into prior.distns.Rdata. Likewise, the list of species that are associated with a PFT determines what subset of data is extracted out of all data matching a given variable name. Demo 3 will demonstrate how a PFT can be created or modified. To look at these files in RStudio click on these files to load them into your workspace. You can further examine them in the Environment window or accessing them at the command line. For example, try typing names(trait.data) as this will tell you what variables were extracted, names(trait.data$Amax) will tell you the names of the columns in the Amax table, and summary(trait.data$Amax) will give you summary data about the Amax values. META-ANALYSIS: *.bug: The evaluation of the meta-analysis is done using a Bayesian statistical software package called JAGS that is called by the R code. For each trait, the R code will generate a [trait].model.bug file that is the JAGS code for the meta-analysis itself. This code is generated on the fly, with PEcAn adding or subtracting the site, treatment, and greenhouse terms depending upon the presence of these effects in the data itself. If the <random.effects> tag is set to FALSE then all random effects will be turned off even if there are multiple sites. meta-analysis.log contains a number of diagnostics, including the summary statistics of the model, an assessment of whether the posterior is consistent with the prior, and the status of the Gelman-Brooks-Rubin convergence statistic (which is ideally 1.0 but should be less than 1.1). ma.summaryplots.*.pdf are collections of diagnostic plots produced in R after the above JAGS code is run that are useful in assessing the statistical model. Open up one of these pdfs to evaluate the shape of the posterior distributions (they should generally be unimodal), the convergence of the MCMC chains (all chains should be mixing well from the same distribution), and the autocorrelation of the samples (should be low). traits.mcmc.Rdata contains the raw output from the statistical code. This includes samples from all of the parameters in the meta-analysis model, not just those that feed forward to the ecosystem, but also the variances, fixed effects, and random effects. post.distns.Rdata stores a simple tables of the posterior distributions for all model parameters in terms of the name of the distribution and its parameters. posteriors.pdf provides graphics showing, for each model parameter, the prior distribution, the data, the smoothed histogram of the posterior distribution (labeled post), and the best-fit analytical approximation to that smoothed histogram (labeled approx). Open posteriors.pdf and compare the posteriors to the priors and data SENSITIVITY ANALYSIS sensitivity.analysis.[RunID].[Variable].[StartYear].[EndYear].pdf shows the raw data points from univariate one-at-a-time analyses and spline fits through the points. Open this file to determine which parameters are most and least sensitive UNCERTAINTY ANALYSIS variance.decomposition.[RunID].[Variable].[StartYear].[EndYear].pdf, contains three columns, the coefficient of variation (normalized posterior variance), the elasticity (normalized sensitivity), and the partial standard deviation of each model parameter. Open this file for BOTH the soil and conifer PFTS and answer the following questions: The Variance Decomposition graph is sorted by the variable explaining the largest amount of variability in the model output (right hand column). From this graph identify the top-tier parameters that you would target for future constraint. A parameter can be important because it is highly sensitive, because it is highly uncertain, or both. Identify parameters in your output that meet each of these criteria. Additionally, identify parameters that are highly uncertain but unimportant (due to low sensitivity) and those that are highly sensitive but unimportant (due to low uncertainty). Parameter constraints could come from further literature synthesis, from direct measurement of the trait, or from data assimilation. Choose the parameter that you think provides the most efficient means of reducing model uncertainty and propose how you might best reduce uncertainty in this process. In making this choice remember that not all processes in models can be directly observed, and that the cost-per-sample for different measurements can vary tremendously (and thus the parameter you measure next is not always the one contributing the most to model variability). Also consider the role of parameter uncertainty versus model sensitivity in justifying your choice of what parameters to constrain. 5.4.2.5 PEcAn Files: This menu shows the contents of the root workflow folder that are not in one of the folders indicated above. It mostly contains log files from the PEcAn workflow that are useful if the workflow generates an error, and as metadata & provenance (a detailed record of how data was generated). STATUS gives a summary of the steps of the workflow, the time they took, and whether they were successful pecan.*.xml are PEcAn settings files workflow.R is the workflow script workflow.Rout is the corresponding log file samples.Rdata contains the parameter values used in the runs. This file contains two data objects, sa.samples and ensemble.samples, that are the parameter values for the sensitivity analysis and ensemble runs respectively sensitivity.output.[RunID].[Variable].[StartYear].[EndYear].Rdata contains the object sensitivity.output which is the model outputs corresponding to the parameter values in sa.samples. ENSEMBLE ANALYSIS ensemble.Rdata contains contains the object ensemble.output, which is the model predictions at the parameter values given in ensemble.samples. ensemble.analysis.[RunID].[Variable].[StarYear].[EndYear].pdf contains the ensemble prediction as both a histogram and a boxplot. ensemble.ts.[RunID].[Variable].[StartYear].[EndYear].pdf contains a time-series plot of the ensemble mean, median, and 95% CI 5.4.2.6 Global Sensitivity: Shiny Navigate to URL/shiny/global-sensitivity. This app uses the output from the ENSEMBLE runs to perform a global Monte Carlo sensitivity analysis. There are three modes controlled by Output type: Pairwise looks at the relationship between a specific parameter (X) and output (Y) All parameters looks at how all parameters affect a specific output (Y) All variables looks at how all outputs are affected by a specific parameter(X) In all of these analyses, the app also fits a linear regression to these scatterplots and reports a number of summary statistics. Among these, the slope is an indicator of global sensitivity and the R2 is an indicator of the contribution to global uncertainty 5.4.2.7 Next Steps The next set of tutorials will focus on the process of data assimilation and parameter estimation. The next two steps are in “.Rmd” files which can be viewed online. 5.4.2.8 Assimilation ‘by hand’ Explore how model error changes as a function of parameter value (i.e. data assimilation ‘by hand’) 5.4.2.9 MCMC Concepts Explore Bayesian MCMC concepts using the photosynthesis module 5.4.2.10 More info about tools, analyses, and specific tasks… Additional information about specific tasks (adding sites, models, data; software updates; etc.) and analyses (e.g. data assimilation) can be found in the PEcAn documentation If you encounter a problem with PEcAn that’s not covered in the documentation, or if PEcAn is missing functionality you need, please search known bugs and issues, submit a bug report, or ask a question in our chat room. Additional questions can be directed to the project manager "],["othervignettes.html", "5.5 Other Vignettes", " 5.5 Other Vignettes 5.5.1 Simple Model-Data Comparisons 5.5.1.1 Author: Istem Fer, Tess McCabe In this tutorial we will compare model outputs to data outside of the PEcAn web interface. The goal of this is to demonstrate how to perform additional analyses using PEcAn’s outputs. To do this you can download each of the Output files, and then perform the analyses using whatever software you prefer, or you can perform analyses directly on the PEcAn server itself. Here we’ll be analyzing model outputs in R using a browser-based version of RStudio that’s installed on the server 5.5.1.2 Starting RStudio Server Open RStudio Server in a new window at URL/rstudio The username is carya and the password is illinois. To open a new R script click File > New File > R Script Use the Files browser on the lower right pane to find where your run(s) are located All PEcAn outputs are stored in the output folder. Click on this to open it up. Within the outputs folder, there will be one folder for each workflow execution. For example, click to open the folder PEcAn_99000000001 if that’s your workflow ID A workflow folder will have a few log and settings files (e.g. pecan.xml) and the following subfolders run contains all the inputs for each run out contains all the outputs from each run pft contains the parameter information for each PFT Within both the run and out folders there will be one folder for each unique model run, where the folder name is the run ID. Click to open the out folder. For our simple case we only did one run so there should be only one folder (e.g. 99000000001). Click to open this folder. Within this folder you will find, among other things, files of the format .nc. Each of these files contains one year of model output in the standard PEcAn netCDF format. This is the model output that we will use to compare to data. 5.5.1.3 Read in settings From an XML file ## Read in the xml settings<-PEcAn.settings::read.settings("~/output/PEcAn_99000000001/pecan.CONFIGS.xml") ## To read in the model output runid<-as.character(read.table(paste(settings$outdir, "/run/","runs.txt", sep=""))[1,1]) # Note: if you are using an xml from a run with multiple ensembles this line will provide only the first run id outdir<- paste(settings$outdir,"/out/",runid,sep= "") start.year<-as.numeric(lubridate::year(settings$run$start.date)) end.year<-as.numeric(lubridate::year(settings$run$end.date)) site.id<-settings$run$site$id File_path<-"~/output/dbfiles/AmerifluxLBL_site_0-772/AMF_US-NR1_BASE_HH_9-1.csv" ## Open up a connection to The Bety Database con <- PEcAn.DB::db.open(settings$database$bety) 5.5.1.4 Read in model output from specific variables model_vars<-c("time", "NEE") #varibles being read model <- PEcAn.utils::read.output(runid,outdir,start.year, end.year, model_vars,dataframe=TRUE) The arguments to read.output are the run ID, the folder where the run is located, the start year, the end year, and the variables being read. The README file in the Input file dropdown menu of any successful run lists the run ID, the output folder, and the start and end year. 5.5.1.5 Compare model to flux observations First load up the observations and take a look at the contents of the file File_format<-PEcAn.DB::query.format.vars(bety = con, format.id = 5000000002) #This matches the file with a premade "format" or a template that describes how the information in the file is organized site<-PEcAn.DB::query.site(site.id, con) #This tells PEcAn where the data comes from observations<-PEcAn.benchmark::load_data(data.path = File_path, format= File_format, time.row = File_format$time.row, site = site, start_year = start.year, end_year = end.year) #This will throw an error that not all of the units can be converted. That's ok, as the units of the varibles of interest (NEE) are being converted. File_Path refers to where you stored your observational data. In this example the default file path is an Ameriflux dataset from Niwot Ridge. File_format queries the database for the format your file is in. The default format ID “5000000002” is for csv files downloaded from the Ameriflux website. You could query for diffent kinds of formats that exist in bety or make your own. Here 772 is the database site ID for Niwot Ridge Forest, which tells pecan where the data is from and what time zone to assign any time data read in. Second apply a conservative u* filter to observations observations$NEE[observations$UST<0.2]<-NA Third align model output and observations aligned_dat = PEcAn.benchmark::align_data(model.calc = model, obvs.calc = observations, var ="NEE", align_method = "match_timestep") When we aligned the data, we got a dataframe with the variables we requested in a \\(NEE.m\\) and a \\(NEE.o\\) format. The \\(.o\\) is for observations, and the \\(.m\\) is for model. The posix column allows for easy plotting along a timeseries. Fourth, plot model predictions vs. observations and compare this to a 1:1 line ## predicted vs observed plot plot(aligned_dat$NEE.m, aligned_dat$NEE.o) abline(0,1,col="red") ## intercept=0, slope=1 Fifth, calculate the Root Mean Square Error (RMSE) between the model and the data rmse = sqrt(mean((aligned_dat$NEE.m-aligned_dat$NEE.o)^2,na.rm = TRUE)) na.rm makes sure we don’t include missing or screened values in either time series. Finally, plot time-series of both the model and data together ## plot aligned data plot(aligned_dat$posix, aligned_dat$NEE.o, type="l") lines(aligned_dat$posix,aligned_dat$NEE.m, col = "red") Bonus How would you compare aggregated data? Try RMSE against monthly NEE instead of half-hourly. In this case, first average the values up to monthly in the observations. Then, use align_data to match the monthly timestep in model output. NOTE: Align_data uses two seperate alignment function, match_timestep and mean_over_larger_timestep. Match_timestep will use only that data that is present in both the model and the observation. This is helpful for sparse observations. Mean_over_larger_timestep aggregates the values over the largest timestep present. If you were to look at averaged monthly data, you would use mean_over_larger_timestep. monthlyNEEobs<-aggregate(observations, by= list(month(observations$posix)), simplify=TRUE, FUN =mean, na.rm= TRUE) plottable<-align_data(model.calc = model, obvs.calc = monthlyNEEobs, align_method = "mean_over_larger_timestep", var= "NEE") head(plottable) 5.5.2 Data Assimilation Concepts The goal of this tutorial is to help you gain some hands-on familiarity with some of the concepts, tools, and techniques involved in Bayesian Calibration. As a warm-up to more advanced approaches to model-data fusion involving full ecosystem models, this example will focus on fitting the Farquhar, von Caemmerer, and Berry (1980) photosynthesis model [FvCB model] to leaf-level photosynthetic data. This is a simple, nonlinear model consisting of 3 equations that models net carbon assimilation, \\(A^{(m)}\\), at the scale of an individual leaf as a function of light and CO2. \\[A_j = \\frac{\\alpha Q}{\\sqrt{1+(\\alpha^2 Q^2)/(Jmax^2)}} \\frac{C_i- \\Gamma}{4 C_i + 8 \\Gamma}\\] \\[A_c = V_{cmax} \\frac{C_i - \\Gamma}{C_i+ K_C (1+[O]/K_O) }\\] \\[A^{(m)} = min(A_j,A_c) - r\\] The first equation \\(A_j\\) describes the RUBP-regeneration limited case. In this equation the first fraction is a nonrectangular hyperbola predicting \\(J\\), the electron transport rate, as a function of incident light \\(Q\\), quantum yield \\(\\alpha\\), and the assymptotic saturation of \\(J\\) at high light \\(J_{max}\\). The second equation, \\(A_c\\), describes the Rubisco limited case. The third equation says that the overall net assimilation is determined by whichever of the two above cases is limiting, minus the leaf respiration rate, \\(r\\). To keep things simple, as a Data Model (a.k.a. Likelihood or Cost Function) we’ll assume that the observed leaf-level assimilation \\(A^{(o)}\\) is Normally distributed around the model predictions with observation error \\(\\tau\\). \\[A^{(o)} \\sim N(A^{(m)},\\tau)\\] To fit this model to data we’re going to rely on a piece of statistical software known as JAGS. The above model would be written in JAGS as: model{ ## Priors Jmax ~ dlnorm(4.7,2.7) ## maximum electron transport rate prior alpha~dnorm(0.25,100) ##quantum yield (mol electrons/mole photon) prior vmax ~dlnorm(4.6,2.7) ## maximum rubisco capacity prior r ~ dlnorm(0.75,1.56) ## leaf respiration prior cp ~ dlnorm(1.9,2.7) ## CO2 compensation point prior tau ~ dgamma(0.1,0.1) for(i in 1:n){ ## electron transport limited Aj[i]<-(alpha*q[i]/(sqrt(1+(alpha*alpha*q[i]*q[i])/(Jmax*Jmax))))*(pi[i]-cp)/(4*pi[i]+8*cp) ## maximum rubisco limited without covariates Ac[i]<- vmax*(pi[i]-cp)/(pi[i]+Kc*(1+po/Ko)) Am[i]<-min(Aj[i], Ac[i]) - r ## predicted net photosynthesis Ao[i]~dnorm(Am[i],tau) ## likelihood } } The first chunk of code defines the prior probability distributions. In Bayesian inference every unknown parameter that needs to be estimated is required to have a prior distribution. Priors are the expression of our belief about what values a parameter might take on prior to observing the data. They can arise from many sources of information (literature survey, meta-analysis, expert opinion, etc.) provided that they do not make use of the data that is being used to fit the model. In this particular case, the priors were defined by Feng and Dietze 2013. Most priors are lognormal or gamma, which were choosen because most of these parameters need to be positive. After the priors is the Data Model, which in JAGS needs to be implemented as a loop over every observation. This is simply a codified version of the earlier equations. Table 1: FvCB model parameters in the statistical code, their symbols in equations, and definitions Parameter Symbol Definition alpha0 \\(\\alpha\\) quantum yield (mol electrons/mole photon) Jmax \\(J_{max}\\) maximum electron transport cp \\(\\Gamma\\) CO2 compensation point vmax0 \\(V_{cmax}\\) maximum Rubisco capacity (a.k.a Vcmax) r \\(R_d\\) leaf respiration tau \\(\\tau\\) residual precision q \\(Q\\) PAR pi \\(C_i\\) CO2 concentration 5.5.2.1 Fitting the model To begin with we’ll load up an example A-Ci and A-Q curve that was collected during the 2012 edition of the Flux Course at Niwot Ridge. The exact syntax below may be a bit confusing to those unaccustomed to R, but the essence is that the filenames line is looking up where the example data is stored in the PEcAn.photosynthesis package and the dat line is loading up two files (once A-Ci, the other A-Q) and concatanating them together. library(PEcAn.photosynthesis) ### Load built in data filenames <- system.file("extdata", paste0("flux-course-3",c("aci","aq")), package = "PEcAn.photosynthesis") dat<-do.call("rbind", lapply(filenames, read_Licor)) ## Simple plots aci = as.character(dat$fname) == basename(filenames[1]) plot(dat$Ci[aci],dat$Photo[aci],main="ACi") plot(dat$PARi[!aci],dat$Photo[!aci],main="AQ") In PEcAn we’ve written a wrapper function, \\(fitA\\), around the statistical model discussed above, which has a number of other bells and whistles discussed in the PEcAn Photosynthesis Vignette. For today we’ll just use the most basic version, which takes as arguments the data and the number of MCMC iterations we want to run. fit <- fitA(dat,model=list(n.iter=10000)) 5.5.2.2 What’s going on Bayesian numerical methods for model calibration are based on sampling parameter values from the posterior distribution. Fundamentally what’s returned is a matrix, with the number of iterations as rows and the number of parameters as columns, which are samples from the posterior distribution, from which we can approximate any quantity of interest (mean, median, variance, CI, etc.). The following plots follow the trajectory of two correlated parameters, Jmax and alpha. In the first figure, arrows show the first 10 iterations. Internally JAGS is choosing between a variety of different Bayesian sampling methods (e.g. Metropolis-Hasting, Gibbs sampling, slice sampling, rejection sampling, etc) to draw a new value for each parameter conditional on the current value. After just 10 steps we don’t have a good picture of the overall posterior, but it should still be clear that the sampling is not a complete random walk. params <- as.matrix(fit$params) xrng = range(fit$params[,"alpha0"]) yrng = range(fit$params[,"Jmax0"]) n = 1:10 plot(params[n,"alpha0"],params[n,"Jmax0"],type='n',xlim=xrng,ylim=yrng) arrows(params[n[-10],"alpha0"],params[n[-10],"Jmax0"],params[n[-1],"alpha0"],params[n[-1],"Jmax0"],length=0.125,lwd=1.1) After 100 steps, we can see a cloud start to form, with occassional wanderings around the periphery. n = 1:100 plot(params[n,"alpha0"],params[n,"Jmax0"],type='l',xlim=xrng,ylim=yrng) After \\(nrow(params)\\) steps what we see is a point cloud of samples from the joint posterior distribution. When viewed sequentially, points are not independent, but we are interested in working with the overall distribution, where the order of samples is not important. n = 1:nrow(params) plot(params[n,"alpha0"],params[n,"Jmax0"],type='p',pch="+",cex=0.5,xlim=xrng,ylim=yrng) 5.5.2.3 Evaluating the model output A really important aspect of Bayesian inference is that the output is the joint posterior probability of all the parameters. This is very different from an optimization approach, which tries to find a single best parameter value. It is also different from estimating the independent posterior probabilities of each parameter – Bayesian posteriors frequently have strong correlation among parameters for reasons having to do both with model structure and the underlying data. The model we’re fitting has six free parameters, and therefore the output matrix has 6 columns, of which we’ve only looked at two. Unfortunately it is impossible to visualize a 6 dimensional parameter space on a two dimensional screen, so a very common practice (for models with a small to medium number of parameters) is to look at all pairwise scatterplots. If parameters are uncorrelated we will typically see oval shaped clouds that are oriented in the same directions as the axes. For parameters with linear correlations those clouds will be along a diagonal. For parameters with nonlinear trade-offs the shapes of the parameter clouds can be more complex, such as the banana-shaped or triangular. For the FvCB model we see very few parameters that are uncorrelated or have simple linear correlations, a fact that we should keep in mind when interpreting individual parameters. pairs(params,pch=".") The three most common outputs that are performed on almost all Bayesian analyses are to look at the MCMC chains themselves, the marginal distributions of each parameter, and the overall summary statistics. The ‘trace’ diagrams below show the history of individual parameters during the MCMC sampling. There are different color lines that represent the fact that JAGS ran the MCMC multiple times, with each run (i.e. each color) being refered to as a different \\(chain\\). It is common to run multiple chains in order to assess whether the model, started from different points, consistently converges on the same answer. The ideal trace plot looks like white noise with all chains in agreement. The ‘density’ figures represent smoothed versions of the marginal distributions of each parameter. The tick marks on the x-axis are the actual samples. You will note that some posteriors will look approximately Normal, while others may be skewed or have clearly defined boundaries. On occassion there will even be posteriors that are multimodal. There is no assumption built into Bayesian statistics that the posteriors need be Normal, so as long as an MCMC has converged this diversity of shapes is valid. [note: the most common cause of multi-modal posteriors is a lack of convergence] Finally, the summary table reports, for each parameter, a mean, standard deviation, two variants of standard error, and standard quantile estimates (95% CI, interquartile, and median). The standard deviation of the posterior is a good summary statistic about how uncertain we are about a parameter. The Naive SE is the traditonal \\(\\frac{SD}{\\sqrt{n}}\\), which is an estimate of the NUMERICAL accuracy in our estimate of the mean. As we run the MCMC longer (i.e. take more samples), we get an answer that is numerically more precise (SE converges to 0) but the uncertainty in the parameter (i.e. SD) stays the same because that’s determined by the sample size of the DATA not the length of the MCMC. Finally, the Time-series SE is a variant of the SE calculation that accounts for the autocorrelation in the MCMC samples. In practice is is therefore more appropriate to use this term to assess numerical accuracy. plot(fit$params,auto.layout = FALSE) ## MCMC diagnostic plots summary(fit$params) ## parameter estimates Assessing the convergence of the MCMC is first done visually, but more generally the use of statistical diagnostics to assess convergence is highly encouraged. There are a number of metrics in the literature, but the most common is the Gelman-Brooks-Rubin statistic, which compare the variance within each chain to the variance across chains. If the chains have converged then this quantity should be 1. Values less than 1.05 are typically considered sufficient by most statisticians, but these are just rules-of-thumb. gelman.plot(fit$params,auto.layout = FALSE) gelman.diag(fit$params) As with any modeling, whether statistical or process-based, another common diagnostic is a predicted vs observed plot. In a perfect model the data would fall along the 1:1 line. The deviations away from this line are the model residuals. If observations lie along a line other than the 1:1 this indicates that the model is biased in some way. This bias is often assessed by fitting a linear regression to the points, though two important things are noteworthy about this practice. First, the \\(R^2\\) and residual error of this regression are not the appropriate statistics to use to assess model performance (though you will frequently find them reported incorrectly in the literature). The correct \\(R^2\\) and residual error (a.k.a Root Mean Square Error, RMSE) are based on deviations from the 1:1 line, not the regression. The code below shows these two terms calculated by hand. The second thing to note about the regression line is that the standard regression F-test, which assesses deviations from 0, is not the test you are actually interested in, which is whether the line differs from 1:1. Therefore, while the test on the intercept is correct, as this value should be 0 in an unbiased model, the test statistic on the slope is typically of less interest (unless your question really is about whether the model is doing better than random). However, this form of bias can easily be assessed by looking to see if the CI for the slope overlaps with 1. ## predicted vs observed plot par(mfrow=c(1,1)) mstats = summary(fit$predict) pmean = mstats$statistics[grep("pmean",rownames(mstats$statistics)),1] plot(pmean,dat$Photo,pch="+",xlab="Predicted A",ylab = "Observed A") abline(0,1,col=2,lwd=2) bias.fit = lm(dat$Photo~pmean) abline(bias.fit,col=3,lty=2,lwd=2) legend("topleft",legend=c("1:1","regression"),lwd=2,col=2:3,lty=1:2) summary(bias.fit) RMSE = sqrt(mean((pmean-dat$Photo)^2)) RMSE R2 = 1-RMSE^2/stats::var(dat$Photo) R2 confint(bias.fit) In the final set of plots we look at the actual A-Ci and A-Q curves themselves. Here we’ve added two interval estimates around the curves. The CI captures the uncertainty in the parameters and will asympotically shrink with more and more data. The PI (predictive interval) includes the parameter and residual error. If our fit is good then 95% PI should thus encompass at least 95% of the observations. That said, as with any statistical model we want to look for systematic deviations in the residuals from either the mean or the range of the PI. ## Response curve plot_photo(dat,fit) Note: on the last figure you will get warnings about “No ACi” and “No AQ” which can be ignored. These are occuring because the file that had the ACi curve didn’t have an AQ curve, and the file that had the AQ curve didn’t have an ACi curve. 5.5.2.4 Additional information There is a more detailed R Vignette on the use of the PEcAn photosynthesis module available in the PEcAn Repository. 5.5.2.5 Citations Dietze, M.C. (2014). Gaps in knowledge and data driving uncertainty in models of photosynthesis. Photosynth. Res., 19, 3–14. Farquhar, G., Caemmerer, S. & Berry, J.A. (1980). A biochemical model of photosynthetic CO2 assimilation in leaves of C3 species. Planta, 149, 78–90. Feng, X. & Dietze, M.C. (2013). Scale dependence in the effects of leaf ecophysiological traits on photosynthesis: Bayesian parameterization of photosynthesis models. New Phytol., 200, 1132–1144. 5.5.3 Parameter Data Assimilation 5.5.3.1 Objectives Gain hands-on experience in using Bayesian MCMC techniques to calibrate a simple ecosystem model using parameter data assimilation (PDA) Set up and run a PDA in PEcAn using model emulation technique, assimilating NEE data from Niwot Ridge Examine outputs from a PDA for the SIPNET model and evaluation of the calibrated model against i) data used to constrain model, ii) additional data for the same site 5.5.3.2 Larger Context Parameter data assimilation (PDA) occurs as one step in the larger process of model calibration, validation, and application. The goal of PDA is to update our estimates of the posterior distributions of the model parameters using data that correspond to model outputs. This differs from our previous use of PEcAn to constrain a simple model using data that map directly to the model parameters. Briefly, the recommended overall approach for model calibration and validation consists of the following steps: Assemble and process data sets required by the model as drivers Perform an initial test-run of the model as a basic sanity check Were there errors in drivers? (return to 1) Is the model in the same ballpark as the data? Construct priors for model parameters Collect/assemble the data that can be used to constrain model parameters and outputs Meta-analysis Sensitivity analysis (SA) Variance Decomposition (VD) Determine what parameters need further constraint Does this data exist in the literature? (repeat 4-8) Can I collect this data in the field? (repeat 4-8) Ensemble Analysis Is reality within the range of the uncertainty in the model? Evaluate/estimate uncertainties in the data Parameter Data Assimilation: Propose new parameter values Evaluate L(data | param) & prior(param) Accept or reject the proposed parameter values Repeat many times until a histogram of accepted parameter values approximates the true posterior distribution. Model evaluation [preferably ensemble based] Against data used to constrain model Against additional data for this site Same variable, different time Different variables Against data at a new site Do I need more data? Repeat 4-9 (direct data constraint) or 6-11 (parameter data assimilation). Application [preferably ensemble forecast or hindcast] 5.5.3.3 Connect to Rstudio Today, we’re again going to work mostly in Rstudio, in order to easily edit advanced PEcAn settings and browse files. So if you haven’t already, connect now to the Rstudio server on your VM ([URL]/rstudio). This tutorial assumes you have successfully completed an ensemble and a sensitivity analysis (Demo 2) before. 5.5.3.4 Defining variables The following variables need to be set specific to the site being run and the workflow being run workflow_id <- 99000000002 ## comes from the History table, your successful ensemble run's workflow ID ## from URL/bety/inputs. ## Search by Ameriflux ID (e.g. US-NR1) ## Find the "plain" site record (no CF or model name) that's on your server ## (probably last in the list) ## Click on the magnifying glass icon then look under "View Related Files" datadir <- "/home/carya/output/dbfiles/AmerifluxLBL_site_0-772/" ## where PEcAn is saving output (default OK on VM) outdir <- "/home/carya/output/" 5.5.3.5 Initial Ensemble Analysis A good place to start when thinking about a new PDA analysis is to look at the current model fit to observed data. In fact, we want to compare data to a full ensemble prediction from the model. This is important because our current parameter distributions will be the priors for PDA. While the analysis will translate these priors into more optimal (in terms of producing model output that matches observations) and more confident (i.e. narrower) posterior distributions, these results are inherently constrained by the current parameter distributions. Thus, if reality falls far outside the prior ensemble confidence interval (which reflects the current uncertainty of all model parameters), data assimilation will not be able to fix this. In such cases, the prior parameter estimates must already be over-constrained, or there are structural errors in the model itself that need fixing. To begin, let’s load up some NEE observations so that we can plot them along with our ensemble predictions. In the code below the elements in bold may vary depending on site and your previous runs. library(PEcAn.all) # read settings settings <- read.settings(file.path(outdir,paste0("PEcAn_",workflow_id),"pecan.CONFIGS.xml")) # open up a DB connection con <- PEcAn.DB::db.open(settings$database$bety) # Fill out the arguments needed by load_data function # read file format information format <- PEcAn.DB::query.format.vars(bety = con, format.id = 5000000002) start_year <- lubridate::year(settings$run$start.date) end_year <- lubridate::year(settings$run$end.date) vars.used.index <- which(format$vars$bety_name %in% c("NEE", "UST")) obs <-PEcAn.benchmark::load_data(data.path = file.path(datadir, "AMF_US-NR1_BASE_HH_9-1.csv"), format = format, start_year = start_year, end_year = end_year, site = settings$run$site, vars.used.index = vars.used.index, time.row = format$time.row) obs$NEE[obs$UST<0.4] <- NA ## U* filter NEEo <- obs$NEE Now let’s load up our ensemble outputs from the previous ensemble analysis (Demo 2) and plot our ensemble predictions against our NEE observations. # load outputs, try not to delete prePDA ensemble output filename from your environment prePDA_ensemble_output_file <- file.path(outdir,paste0("PEcAn_",workflow_id, "/ensemble.ts.", settings$ensemble$ensemble.id, ".NEE.2003.2006.Rdata")) load(prePDA_ensemble_output_file) # calculate CI pre_pda_ens <- ensemble.ts[["NEE"]] preCI <- apply(pre_pda_ens, 2, quantile, c(0.025, 0.5, 0.975), na.rm = TRUE) # plot model ensemble ymin <- min(min(c(preCI, NEEo), na.rm = TRUE)) ymax <- max(max(c(preCI, NEEo), na.rm = TRUE)) plot(preCI[2,], ylim = c(ymin, ymax), lwd = 2, xlab = "time", ylab = "NEE", main = "pre-PDA model ensemble vs data", type = "n") prepoly <- 1:dim(preCI)[2] polygon(c(prepoly, rev(prepoly)), c(preCI[3,], rev(preCI[1,])), col='khaki', border=NA) # add data points(NEEo, pch = ".", col= adjustcolor("purple",alpha.f=0.5)) legend("topright", legend=c("Data","Pre-PDA Model"), pch=c(15,15), col=c("purple","khaki")) When interpreting your results it is important to remember the difference between a confidence interval, which just includes parameter uncertainties, and a predictive interval, which includes parameter and residual uncertainties. Your ensemble analysis plot illustrates the former—i.e., the confidence in the mean NEE. By contrast, the data reflect both changes in mean NEE, and random variability. As such, we can’t expect all the data to fall within the CI; in fact, if we had unlimited data to constrain mean NEE, the CI would collapse to a single line and none of the data would be contained! However, your plot will give you an idea of how much uncertainty there is in your model currently, and help to identify systematic errors like bias (values consistently too high or low) or poorly represented seasonal patterns. 5.5.3.6 Questions: Does your ensemble agree well with the data? If so, how much room for improvement is there, in terms of tightening the CI? If not, what are the greatest discrepancies? What are some of the problems (with model, data, and/or PEcAn) that might explain the data-model disparity you see? 5.5.3.7 Choosing Parameters Beyond exploratory exercises, the first step of PDA analysis is to choose the model parameters you will target for optimization. PDA is computationally expensive (even when using an emulator), and the cost increases exponentially with the number of parameters targeted. The number you can handle in any given analysis completely depends on the complexity of the model and your available computational resources, but in practice it’s going to be rather small (~1–10) relative to the large number of parameters in a mechanistic ecosystem model (~10–100). Given this limitation, it is important to target parameters that can contribute substantially to improving model fit. If you recall, identifying those parameters was the goal of the uncertainty analysis you conducted previously, in the second PEcAn demo. Let’s revisit that analysis now. Open your variance decomposition graph from Demo 2 From this figure decide which variables you will target with PDA. As noted, an obvious criterion is that the parameter should be contributing a large amount of uncertainty to the current model, because otherwise it simply can’t change the model output much no matter how much you try to optimize it. But there are other considerations too. For example, if two parameters have similar or competing roles in the model, you may have trouble optimizing both simultaneously. In practice, there will likely be some guess-and-testing involved, though a good understanding of how the model works will help. It may also help to look at the shape of the Sensitivity responses and details of model fit to data (your ensemble analysis from the previous section). For the purposes of this demo, choose eight to ten parameters (in total, if you have more than one PFT) that contribute high uncertainty to model output and/or seem like good choices for some other rational reason. 5.5.3.8 Questions: Which parameters did you choose, and why? 5.5.3.9 Editing PEcAn settings Now let’s add settings to tell PEcAn how to run the PDA with emulator, we will come to the details of model emulation later. Open up the pecan.CONFIGS.xml file you located previously, and choose File > Save as... from the menu to save a new copy as pecan.PDA.xml. Now add the block of XML listed below to the file, immediately after the line. Check and fill in the parts corresponding to your run when necessary. In this block, use the <param.names><param> tags to identify the parameters you’ve chosen for PDA (it’s up to you to choose the number of parameters you want to constrain, then you can set the <n.knot> to be >= 10 per parameter you choose, e.g. 200 knots for 10 parameters). Here, you need to use PEcAn’s standard parameter names, which are generally not the same as what’s printed on your variance decomposition graph. To find your parameters look at the row names in the prior.distns.csv file for each PFT under the PFT pulldown menu. Insert the variable name (exactly, and case sensitive) into the <param> tags of the XML code below. In addition, you may need to edit <inputs><file><path>, depending on the site and year you ran previously. The rest of the settings control options for the PDA analysis (how long to run, etc.), and also identify the data to be used for assimilation. For more details, see the assim.batch vignette on the PEcAn GitHub page (https://goo.gl/9hYVPQ). <?xml version="1.0"?> <-- These lines are already in there. Don't duplicate them, <pecan> <-- just paste the <assim.batch> block below right after them. <assim.batch> <method>emulator</method> <n.knot>160</n.knot> <-- FILL IN <iter>25000</iter> <chain>3</chain> <param.names> <soil> <param>YOUR_PFT_1_PARAM_1</param> <-- FILL IN <param>YOUR_PFT_1_PARAM_2</param> <-- FILL IN </soil> <temperate.coniferous> <param>YOUR_PFT_2_PARAM_1</param> <-- FILL IN <param>YOUR_PFT_2_PARAM_2</param> <-- FILL IN <param>YOUR_PFT_2_PARAM_3</param> <-- FILL IN <param>YOUR_PFT_2_PARAM_4</param> <-- FILL IN <param>YOUR_PFT_2_PARAM_5</param> <-- FILL IN <param>YOUR_PFT_2_PARAM_6</param> <-- FILL IN </temperate.coniferous> </param.names> <jump> <adapt>100</adapt> <adj.min>0.1</adj.min> <ar.target>0.3</ar.target> </jump> <inputs> <file> <path> <path>/home/carya/output/dbfiles/AmerifluxLBL_site_0-772/AMF_US-NR1_BASE_HH_9-1.csv</path> </path> <format>5000000002</format> <input.id>1000011238</input.id> <-- FILL IN, from BETY inputs table, this is *NOT* the workflow ID <likelihood>Laplace</likelihood> <variable.name> <variable.name>NEE</variable.name> <variable.name>UST</variable.name> </variable.name> <variable.id>297</variable.id> </file> </inputs> </assim.batch> Once you’ve made and saved the changes to your XML, load the file and check that it contains the new settings: settings <- read.settings(file.path(outdir,paste0("PEcAn_",workflow_id),"pecan.PDA.xml")) settings$assim.batch If the printed list contains everything you just added to pecan.PDA.xml, you’re ready to proceed. 5.5.3.10 Investigating PEcAn function pda.emulator (optional) Before we run the data assimilation, let’s take a high-level look at the organization of the code. Use the Rstudio file browser to open up ~/pecan/modules/assim.batch/R/pda.emulator.R. This code works in much the same way as the pure statistical models that we learned about earlier in the week, except that the model being fit is a statistical model that emulates a complicated process-based computer simulation (i.e., an ecosystem model). We could have directly used the ecosystem model (indeed PEcAn’s other PDA functions perform MCMC by actually running the ecosystem model at each iteration, see pda.mcmc.R script as an example), however, this would require a lot more computational time than we have today. Instead here we will use a technique called model emulation. This technique allows us to run the model for a relatively smaller number of times with parameter values that have been carefully chosen to give a good coverage of parameter space. Then we can interpolate the likelihood calculated for each of those runs to get a surface that “emulates” the true likelihood and perform regular MCMC, except instead of actually running the model on every iteration to get a likelihood, this time we will just get an approximation from the likelihood emulator. The general algorithm of this method can be further expressed as: Propose initial parameter set sampling design Run full model for each parameter set Evaluate the likelihoods Construct emulator of multivariate likelihood surface Use emulator to estimate posterior parameter distributions (Optional) Refine emulator by proposing new design points, goto 2) For now, we just want you to get a glimpse at the overall structure of the code, which is laid out in the comment headers in pda.emulator(). Most of the real work gets done by the functions this code calls, which are all located in the file ~/pecan/modules/assim.batch/R/pda.utils.R and the MCMC will be performed by the mcmc.GP() function in ~/pecan/modules/emulator/R/minimize.GP.R. To delve deeper into how the code works, take a look at these files when you have the time. 5.5.3.11 Running a demo PDA Now we can go ahead and run a data assimilation MCMC with emulator. Since you’ve already loaded the settings containing your modified <assim.batch> XML block, all you need to do to start the PDA is run pda.emulator(settings). But, in the emulator workflow, there is a bit of a time consuming step where we calculate the effective sample size of the input data, and we have already done this step for you. You could load it up and pass it to the function explicitly in order to skip this step: # load input data load("/home/carya/pda/pda_externals.Rdata") postPDA.settings <- pda.emulator(settings, external.data = inputs_w_neff) After executing the code above, you will see print-outs to the console. The code begins with loading the prior values which in this case are the posterior distributions coming from your previous meta analysis. Then, normally, it loads the observational data and carries out necessary conversions and formatting to align it with model outputs, as we did separately above, but today it will skip this step as we are passing data externally. After this step, you will see a progress bar where the actual model is run n.knot times with the proposed parameter sets and then the outputs from these runs are read. Next, this model output is compared to the specified observational data, and the likelihood is calculated using the heteroskedastic Laplacian discussed previously. Once we calculate the likelihoods, we fit an emulator which interpolates the model output in parameter space between the points where the model has actually been run. Now we can put this emulator in the MCMC algorithm instead of the model itself. Within the MCMC loop the code proposes new parameter value from a multivariate normal jump distribution. The corresponding likelihood will be approximated by the emulator and the new parameter value is accepted or rejected based on its posterior probability relative to the current value. 5.5.3.12 Outputs from PEcAn’s Parameter Data Assimilation When the PDA is finished, a number of outputs are automatically produced that are either the same as or similar to posterior outputs that we’ve seen before. These are located in the PEcAn_[workflow_id]/pft/* output directory and are identified by pda.[PFT]_[workflow_id] in the filenames: posteriors.pda.[PFT]*.pdf shows the posterior distributions resulting from your PDA trait.mcmc.pda.[PFT]*.Rdata contains all the parameter samples contained in the PDA posterior mcmc.pda.[PFT]*.Rdata is essentially the same thing in a different format mcmc.diagnostics.pda.[PFT]*.pdf shows trace plots and posterior densities for each assimilated parameter, as well as pairs plots showing all pairwise parameter correlations. Together, these files allow you to evaluate whether a completed PDA analysis has converged and how the posterior distributions compare to the priors, and to use the posterior samples in further analyses, including additional PDA. If you haven’t done so already, take a look at all of the outputs described here. 5.5.3.13 Questions: Do the diagnostic figures indicate that your likelihood at least improved over the course of the analysis? Does the MCMC appear to have converged? Are the posterior distributions well resolved? 5.5.3.14 Post-PDA analyses In addition to the outputs of the PDA itself, you may want to conduct ensemble and/or sensitivity analyses based on the posteriors of the data assimilation, in order to check progress towards improved model fit and/or changing sensitivity. For this, you need to generate new model runs based on parameters sampled from the updated (by PDA) posterior, which is a simple matter of rerunning several steps of the PEcAn workflow. The PDA you ran has automatically produced an updated XML file (pecan.pda***.xml) that includes the posterior id to be used in the next round of runs. Locate this file in your run directory and load the file for the post-pda ensemble/sensitivity analysis (if you already have the settings list in your working environment you don’t need to re-read the settings): # read post-PDA settings if you don't have them in your wotking environment # replace the *** with the ensemble id given by the workflow # postPDA.settings <- read.settings(file.path(outdir,paste0("PEcAn_", workflow_id),"pecan.pda***.xml")) # Call model specific write.configs postPDA.settings <- run.write.configs(postPDA.settings, write=postPDA.settings$database$bety$write, ens.sample.method=postPDA.settings$ensemble$method) # Let's save the settings with the new ensemble id PEcAn.settings::write.settings(settings, outputfile=paste0('pecan.pda', postPDA.settings$assim.batch$ensemble.id,'.xml')) # Start ecosystem model runs, this one takes awhile... PEcAn.workflow::start_model_runs(postPDA.settings, postPDA.settings$database$bety$write) # Get results of model runs get.results(postPDA.settings) # Repeat ensemble analysis with PDA-constrained params run.ensemble.analysis(postPDA.settings, TRUE) # let's re-load the pre-PDA ensemble outputs load(prePDA_ensemble_output_file) pre_pda_ens <- ensemble.ts[["NEE"]] # nowload the post-PDA ensemble outputs postPDA_ensemble_output_file <- file.path(outdir,paste0("PEcAn_", workflow_id, "/ensemble.ts.", postPDA.settings$ensemble$ensemble.id, ".NEE.2003.2006.Rdata")) load(postPDA_ensemble_output_file) post_pda_ens <- ensemble.ts[["NEE"]] # try changing the window value for daily, weekly, monthly smoothing later # see if this changes your model-data agreement, why? window <- 1 # no smoothing pre_pda <- t(apply(pre_pda_ens, 1, function(x) { tapply(x, rep(1:(length(x)/window + 1), each = window)[1:length(x)], mean, na.rm = TRUE)})) post_pda <- t(apply(post_pda_ens, 1, function(x) { tapply(x, rep(1:(length(x)/window + 1), each = window)[1:length(x)], mean, na.rm = TRUE)})) fobs <- tapply(NEEo, rep(1:(length(NEEo) / window + 1), each = window)[1:length(NEEo)], mean, na.rm = TRUE) # save the comparison plots to pdf pdf(file.path(outdir,paste0("PEcAn_",workflow_id),"model.data.comparison.pdf"), onefile=T, paper='A4r', height=15, width=20) # now plot the pre-PDA ensemble similar to the way we did before preCI <- apply(pre_pda, 2, quantile, c(0.025, 0.5, 0.975), na.rm = TRUE) ymin <- min(min(c(preCI, fobs), na.rm = TRUE)) ymax <- max(max(c(preCI, fobs), na.rm = TRUE)) plot(pre_pda[1,], ylim = c(ymin, ymax), lwd = 2, xlab = "time", ylab = "NEE", main = "pre-PDA vs post-PDA", type = "n") prepoly <- 1:dim(preCI)[2] polygon(c(prepoly, rev(prepoly)),c(preCI[3,], rev(preCI[1,])),col='khaki',border=NA) # plot the post-PDA ensemble postCI <- apply(post_pda, 2, quantile, c(0.025, 0.5, 0.975), na.rm = TRUE) postpoly <- 1:dim(postCI)[2] polygon(c(postpoly, rev(postpoly)),c(postCI[3,], rev(postCI[1,])),col='lightblue',border=NA) # finally let's add the data and see how we did points(fobs, pch = ".", col= adjustcolor("purple",alpha.f=0.7)) legend("topright", legend=c("Data","Pre-PDA Model", "Post-PDA Model"), pch=c(15,15,15), col=c("purple","khaki","lightblue")) dev.off() # Repeat variance decomposition to see how constraints have changed run.sensitivity.analysis(postPDA.settings) Now you can check the new figures produced by your analyses under PEcAn_[workflow_id]/pft/*/variance.decomposition.*.pdf and PEcAn_[workflow_id]/pft/*/sensitivity.analysis.*.pdf, and compare them to the previous ones. Also, take a look at the comparison of model outputs to data when we run SIPNET with pre- and post-PDA parameter (mean) values under PEcAn_[workflow_id]/model.data.comparison.pdf. 5.5.3.15 Questions: Looking at the ensemble analysis outputs in order (i.e., in order of increasing ID in the filenames), qualitatively how did the model fit to data change over the course of the analysis? Based on the final ensemble analysis, what are the major remaining discrepancies between model and data? Can you think of the processes / parameters that are likely contributing to the differences? What would be your next steps towards evaluating or improving model performance? 5.5.4 State-Variable Data Assimilation 5.5.4.1 Objectives: Assimilate tree ring estimated NPP & inventory AGB within the SIPNET model in order to: Reconcile data and model NPP & AGB estimates Constrain inferences about other ecosystem responses. 5.5.4.2 Overview: Initial Run Settings Load and match plot and increment data Estimate tree-level data uncertainties Estimate allometric relationships Estimate stand-level NPP Sample initial conditions and parameters Run Ensemble Kalman Filter 5.5.4.3 Initial Run Perform a site-level SIPNET run using the following settings Site = UNDERC Start = 01/01/1979 End = 12/31/2015 Met = NARR Check Brown Dog When the run is complete, open the pecan.xml and cut-and-paste the outdir for later use 5.5.4.4 Settings: Open the PEcAn RStudio environment back up. Set your working directory to the outdir from above setwd(outdir) and shift the file browser to that location (Files > More > Go To Working Directory) Open up the latest settings file pecan.CONFIG.xml. At the top of the file add the following tags to set the ensemble size <state.data.assimilation> <n.ensemble>35</n.ensemble> <process.variance>FALSE</process.variance> <sample.parameters>TRUE</sample.parameters> <data> <format_id>1000000040</format_id> <input.id>1000013298</input.id> </data> <state.variables> <variable> <variable.name>NPP</variable.name> <unit>MgC/ha/yr</unit> <min_value>-9999</min_value> <max_value>9999</max_value> </variable> <variable> <variable.name>AbvGrndWood</variable.name> <unit>KgC/m^2</unit> <min_value>0</min_value> <max_value>9999</max_value> </variable> <variable> <variable.name>TotSoilCarb</variable.name> <unit>KgC/m^2</unit> <min_value>0</min_value> <max_value>9999</max_value> </variable> <variable> <variable.name>LeafC</variable.name> <unit>m^2/m^2</unit> <min_value>0</min_value> <max_value>9999</max_value> </variable> <variable> <variable.name>SoilMoistFrac</variable.name> <unit></unit> <min_value>0</min_value> <max_value>9999</max_value> </variable> <variable> <variable.name>SWE</variable.name> <unit>cm</unit> <min_value>0</min_value> <max_value>9999</max_value> </variable> <variable> <variable.name>Litter</variable.name> <unit>gC/m^2</unit> <min_value>0</min_value> <max_value>9999</max_value> </variable> </state.variables> <forecast.time.step>year</forecast.time.step> <start.date>1980/01/01</start.date> <end.date>2015/12/31</end.date> </state.data.assimilation> Delete the <pfts> block from the settings In the PEcAn History, go to your PDA run and open pecan.pda[UNIQUEID].xml (the one PEcAn saved for you AFTER you finished the PDA) Cut-and-paste the PDA <pfts> block into the SDA settings file Save the file as pecan.SDA.xml 5.5.4.5 Loading data If you have not done so already, clone (new) or pull (update) the PalEON Camp2016 repository Open a shell under Tools > Shell cd to go to your home directory To clone: git clone git@github.com:PalEON-Project/Camp2016.git To pull: cd Camp2016; git pull https://github.com/PalEON-Project/Camp2016.git master Open the tree-ring data assimilation workflow under Home > pecan > scripts > workflow.treering.R Run the script from the start up through the LOAD DATA section 5.5.4.6 Estimating tree-level data uncertainties One thing that is critical for data assimilation, whether it is being used to estimate parameters or state variables, is the careful consideration and treatment of the uncertainties in the data itself. For this analysis we will be using a combination of forest plot and tree ring data in order to estimate stand-level productivity. The basic idea is that we will be using the plot-sample of measured DBHs as an estimate of the size structure of the forest, and will use the annual growth increments to project that forest backward in time. Tree biomass is estimated using empirical allometric equations relating DBH to aboveground biomass. There are a number of sources of uncertainty in this approach, and before moving you are encouraged to think about and write down a few: Today we will use a statistical model based on the model developed by Clark et al 2007 that partitions out a number of sources of variability and uncertainty in tree ring and plot data (Fig 1). This model is a Bayesian statespace model that treats the true diameters (D) and increments (X) as latent variables that are connected through a fairly simple mixed effects process model \\[D_{ij,t+1} = D_{ij,t} + \\mu + \\alpha_{i} + \\alpha_t + \\epsilon_{ij,t}\\] where i = individual, j = plot, t = time (year). Each of these terms are represented at normal distributions, where \\(\\mu\\) is a fixed effect (overall mean growth rate) and individual and year are random effects \\[\\mu \\sim N(0.5,0.5)\\] \\[\\alpha_{i} \\sim N(0,\\tau_{i})\\] \\[\\alpha_{t} \\sim N(0,\\tau_{t})\\] \\[\\epsilon_{ij,t} \\sim N(0,\\tau_{e})\\] The connection between the true (latent) variable and the observations is also represented as normal with these variances representing measurement error: \\[D_{ij,t}^O \\sim N( D_{ij,t},\\tau_D)\\] \\[X_{ij,t}^O \\sim N( X_{ij,t},\\tau_r)\\] Finally, there are five gamma priors on the precisions, one for the residual process error (\\(\\tau_{e}\\)), two for the random effects on individual (\\(\\tau_{i}\\)) and time (\\(\\tau_t\\)), and two measurement errors or DBH (\\(\\tau_D\\)) and tree rings (\\(\\tau_r\\)) \\[\\tau_{e} \\sim Gamma(a_e,r_e)\\] \\[\\tau_{i} \\sim Gamma(a_i,r_i)\\] \\[\\tau_{t} \\sim Gamma(a_t,r_t)\\] \\[\\tau_{D} \\sim Gamma(a_D,r_D)\\] \\[\\tau_{r} \\sim Gamma(a_r,r_r)\\] This model is encapsulated in the PEcAn function: InventoryGrowthFusion(combined,n,iter) where the first argument is the combined data set formatted for JAGS and the second is the number of MCMC interations. The model itself is written for JAGS and is embedded in the function. Running the above InventoryGrowthFusion will run a full MCMC algorithm, so it does take a while to run. The code returns the results as an mcmc.list object, and the next line in the script saves this to the outputs directory We then call the function InventoryGrowthFusionDiagnostics to print out a set of MCMC diagnostics and example time-series for growth and DBH. 5.5.4.7 Allometric equations Aboveground NPP is estimated as the increment in annual total aboveground biomass. This estimate is imperfect, but not unreasonable for demonstration purposes. As mentioned above, we will take an allometric approach of scaling from diameter to biomass: Biomass = b0 * DBH b1 We will generate the allometric equation on a PFT level using another Bayesian model that synthesizes across a database of species-level allometric equations (Jenkins et al 2004). This model has two steps within the overall MCMC loop. First it simulates data from each equation, including both parameter and residual uncertainties, and then it updated the parameters of a single allometric relationship across all observations. The code also fits a second model, which includes a random site effect, but for simplicity we will not be using output from this version. Prior to running the model we have to first query the species codes for our pfts. Next we pass this PFT list to the model, AllomAve, which saves the results to the output directory in addition to returning a summary of the parameters and covariances. 5.5.4.8 Estimate stand-level NPP If we have no uncertainty in our data or allometric equations, we could estimate the stand aboveground biomass (AGB) for every year by summing over the biomass of all the trees in the plot and then divide by the plot area. We would then estimate NPP by the difference in AGB between years. One approach to propagating uncertainties into NPP would be to transform the distribution of DBH for each individual tree and year into a distribution for biomass, then sum over those distributions to get a distribution for AGB and then subtract the distributions to get the distributions of NPP. However, if we do this we will greatly overestimate the uncertainty in NPP because we ignore the fact that our increment data has much lower uncertainty than our diameter data. In essence, if we take a random draw from a distribution of AGB in year and it comes back above average, the AGB is much more likely to also be above average the following year than if we were to do an independent draw from that distribution. Accounting for this covariance requires a fairly simple change in our approach and takes advantage of the nature of the MCMC output. The basic idea is that we are going to take a random draw from the full individual x year diameter matrix, as well as a random draw of allometric parameters, and perform the ‘zero-error’ calculation approach described above. We will then create a distribution of all the NPP estimates that comes out of repeated draws for the full diameter matrix. This approach is encapsulated in the function plot2AGB. The argument unit.conv is a factor that combines both the area of the plot and the unit conversion from tree biomass (kg/tree) to stand AGB (Mg/ha). There are two outputs from plot2AGB: a pdf depicting estimated NPP and AGB (mean and 95% CI) time series, with each page being a plot; and plot2AGB.Rdata, a binary record of the function output that is read into the data assimilation code. The latter is also returned from the fuction and assigned to the variable “state”. Finally, we calculate the mean and standard deviation of NPP and save this as obs. 5.5.4.9 Build Initial Conditions The function sample.IC.SIPNET uses the AGB estimate from the previous function in order to initialize the data assimilation routine. Specifically it samples n.ensemble values from the first time step of the AGB estimate. Embedded in this function are also a number of prior distributions for other state variables, which are also samples in order to create a full set of initial conditions for SIPNET. 5.5.4.10 Load Priors The function sample.parameters samples values from the most recent posterior parameter distributions. You can also specify a specific set of parameters so sample from by specifying the argument <prior> within <assim.sequential> as the posterior.id you want to use. This is useful to know if you want to go back and run with the Meta-analysis posteriors, or if you end up rerunning the meta-analysis and need to go back and specify the parameter data assimilation posteriors instead of the most recent. 5.5.4.11 Ensemble Kalman Filter The function sda.enkf will run SIPNET in Ensemble Kalman Filter mode. The output of this function will be all the of run outputs, a PDF of diagnostics, and an Rdata object that includes three lists: FORECAST will be the ensemble forecasts for each year ANALYSIS will be the updated ensemble sample given the NPP observations enkf.params contains the prior and posterior mean vector and covariance matrix for each time step. If you look within this function you will find that much of the format is similar to the pda.mcmc function, but in general is much simpler. The function begins by setting parameters, opening a database connection, and generating workflow and ensemble ID’s. Next we split the SIPNET clim meteorology file up into individual annual files since we will be running SIPNET for a year at a time between updates. Next we perform an initial set of runs starting from the initial states and parameters we described above. In doing so we create the run and output directories, the README file, and the runs.txt file that is read by start_model_runs. Worth noting is that the README and runs.txt don’t need to be updated within the forecast loop. Given this initial run we then enter the forecast loop. Within this loop over years we perform four basic steps. First, we read the output from the latest runs. Second, we calculate the updated posterior state estimates based on the model ensemble prior and observation likelihood. Third, we resample the state estimates based on these posterior parameters. Finally, we start a new set of runs based on this sample. The sda.enfk function then ends by saving the outputs and generating some diagnostic figures. The first set of these shows the data, forecast, analysis. The second set shows pairs plots of the covariance structure for the Forecast and Analysis steps. The final set shows the time-series plots for the Analysis of the over state variables produced by SIPNET. 5.5.4.12 Finishing up The final bit of code in the script will register the workflow as complete in the database. After this is run you should be able to find all of the runs, and all of the outputs generated above, from within the PEcAn webpages. 5.5.5 PEcAn: Testing the Sensitivity Analysis Against Observations” 5.5.5.1 Author: “Ankur Desai” 5.5.5.2 Flux Measurements and Modeling Course, Tutorial Part 2 This tutorial assumes you have successfully completed the Demo01, Demo02 and the modelVSdata tutorial. 5.5.5.3 Introduction Now that you have successfully run PEcAn through the web interface and have learned how to do a simple comparison of flux tower observations to model output, let’s start looking at how data assimilation and parameter estimation would work with an ecosystem model. Before we start a full data assimilation exercise, let’s try something simple – single parameter selection by hand. Open http://localhost:3280/rstudio or http://localhost:6480/rstudio or the Amazon URL/rstudio if running on the cloud. In Demo02, you have ran a sensitivity analysis of SIPNET model runs at Niwot Ridge sampling across quantiles of a parameter prior, while holding all others to the median value. The pecan.xml file told PEcAn to run an sensitivity analysis, which simply meant SIPNET was run multiple times with the same driver, but varying parameter values one at a time (while holding all others to their median), and the parameter range also specified in the pecan.xml file (as quantiles, which were then sampled against the BETY database of observed variation in the parameter for species within the specific plant functional type). Let’s try to compare Ameriflux NEE to SIPNET NEE across all these runs to make a plot of parameter vs. goodness-of-fit. We’ll start with root mean square error (RMSE), but then discuss some other tests, too. 5.5.5.4 A. Read in settings object from a run xml Open up a connection to the bety database and create a settings object from your xml, just like in the modelVSdata tutorial. settings<-PEcAn.settings::read.settings("~/output/PEcAn_99000000002/pecan.CONFIGS.xml") con <- PEcan.DB::db.open(settings$database$bety) We read in the pecan.CONFIG.xml instead of the pecan.xml because the pecan.CONFIG.xml has already incorperated some of the information from the database we would have to query again if we used the pecan.xml. runid<-as.character(read.table(paste(settings$outdir, "/run/","runs.txt", sep=""))[1,1]) # Note: if you are using an xml from a run with multiple ensembles this line will provide only the first run id outdir<- paste(settings$outdir,"/out/",runid,sep= "") start.year<-as.numeric(lubridate::year(settings$run$start.date)) end.year<-as.numeric(lubridate::year(settings$run$end.date)) site.id<-settings$run$site$id Back to the files pane, within the run/ folder, find a folder called pft/ and within that a folder with the pft name (such as temprature.coniferous). Within that is a PDF file that starts sensitivity.analysis. In Rstudio, just click on the PDF to view it. You discussed this PDF last tutorial, through the web interface. Here, we see how the model NEE in SIPNET changes with each parameter. Let’s read that sensitivity output. Navigate back up (..) to the ~/output/RUNDIR/ folder. Find a series of files that end in “.RData”. These files contain the R variables used to make these plots. In particular, there is sensitivity.output..RData which contains the annual NEE as a function of each parameter quantile. Click on it to load a variable into your environment. There is sensitivity.results..RData which contains plotting functions and variance decomposition output, which we don’t need in this tutorial. And finally, there is **sensitivity.samples.*.RData** which contains the actual parameter values and the RunIDs associated with each sensitivity run. Click on sensitivity.samples..RData* to load it into your environment, or run the {r}load() script below. You should see a set of five new variables (pft.names, trait.names, sa.ensemble.id, sa.run.ids, sa.samples). Let’s extract a parameter and it’s sensitivity NEE output from the list sa.samples, which is organized by PFT, and then by parameter. First, let’s look at a list of PFTs and parameters available: load(paste(settings$outdir,"/sensitivity.samples.", settings$sensitivity.analysis$ensemble.id,".Rdata", sep="")) names(sa.samples) names(sa.samples$temperate.coniferous) Now to see the actual parameter values used by the runs, just pick a parameter and type: sa.samples$temperate.coniferous$psnTOpt Let’s store that value for future use: psnTOpt <- sa.samples$temperate.coniferous$psnTOpt Now, to see the annual NEE output from the model for a particular PFT and parameter range, try load(paste(settings$outdir,paste("/sensitivity.output", settings$sensitivity.analysis$ensemble.id,settings$sensitivity.analysis$variable,start.year,end.year,"Rdata", sep="."), sep="")) sensitivity.output$temperate.coniferous$psnTOpt You could even plot the two: plot(psnTOpt,sensitivity.output$temperate.coniferous$psnTOpt) What do you notice? Let’s try to read the output from a single run id as you did in the earlier tutorial. runids <- sa.run.ids$temperate.coniferous$psnTOpt arun <- PEcAn.utils::read.output(runids[1], paste(settings$outdir, "out", runids[1], sep="/"), start.year= start.year, end.year= end.year,"NEE", dataframe = TRUE) plot(arun$posix,arun$NEE) 5.5.5.5 B. Now let’s bring in the actual observations Recall reading Ameriflux NEE in the modelVSdata tutorial. File_path<-"~/output/dbfiles/AmerifluxLBL_site_0-772/AMF_US-NR1_BASE_HH_9-1.csv" File_format<-PEcAn.DB::query.format.vars(bety = con, format.id = 5000000002) #This matches the file with a premade "format" or a template that describes how the information in the file is organized site<-PEcAn.DB::query.site(site.id = site.id, con) obs<-PEcAn.benchmark::load_data(data.path = File_path, format= File_format, time.row = File_format$time.row, site = site, start_year = start.year, end_year = end.year) obs$NEE[obs$UST<0.2]<-NA #Apply a U* filter plottable<-align_data(model.calc = arun, obvs.calc = obs, align_method = "match_timestep", var= "NEE") head(plottable) 5.5.5.6 C. Finally, we can finally compare model to data In the modelVSdata, you also compared NEE to the ensemble model run. Here we will do the same except we include each sensitivity run. plot(plottable$NEE.m,plottable$NEE.o) abline(0,1,col="red") And remember the formula for RMSE: sqrt(mean((plottable$NEE.o-plottable$NEE.m)^2,na.rm = TRUE)) All we need to do to go beyond this is to make a loop that reads in each sensitivity run NEE based on runids, calculates RMSE against the observations, and stores it in an array, by combining the steps above in a for loop. Make sure you change the directory names and year to your specific run. rmses <- rep(0,length(runids)) for(r in 1:length(runids)){ arun <- read.output(runids[r],paste(settings$outdir, "out", runids[r], sep="/"),2004,2004,"NEE", dataframe= TRUE) plottable<-align_data(model.calc = arun, obvs.calc = obs, align_method = "match_timestep", var= "NEE") rmses[r] <- sqrt(mean((plottable$NEE.o-plottable$NEE.m)^2,na.rm = TRUE)) } rmses Let’s plot that array plot(psnTOpt,rmses) Can you identify a minimum (if there is one)? If so, is there any reason to believe this is the “best” parameter? Why or why not? Think about all the other parameters. Now that you have the hang of it, here are a few more things to try: Try a different error functions, given actual NEE uncertainty. You learned earlier that uncertainty in half-hourly observed NEE is not Gaussian. This makes RMSE not the correct measure for goodness-of-fit. Go to ~/pecan/modules/uncertainty/R, open flux_uncertainty.R, and click on the source button in the program editing pane. Alternatively, you can source the function from the console using: source("pecan/modules/uncertainty/R/flux_uncertainty.R") Then you can run: unc <- flux.uncertainty(plottable$NEE.o,QC=rep(0,17520)) plot_flux_uncertainty(unc) The figure shows you uncertainty (err) as a function of NEE magnitude (mag). How might you use this information to change the RMSE calculation? Try a few other parameters. Repeat the above steps but with a different parameter. You might want to select one from the sensitivity PDF that has a large sensitivity or from the variance decomposition that is also poorly constrained. "],["advanced-user.html", "5.6 Advanced User Guide", " 5.6 Advanced User Guide Workflow curl submission 5.6.1 Submitting Workflow from Command Line This is how you can submit a workflow from the command line through the pecan web interface. This will use curl to submit all the requireed parameters to the web interface and trigger a run. # the host where the model should run # never use remote sites since you will need to pass your username/password and that WILL be stored hostname=pecan.vm # the site id where to run the model (NIWOT in this case) siteid=772 # start date and end date, / need to be replaced with %2F or use - (NOT TESTED) start=2004-01-01 end=2004-12-31 # if of model you want to run, rest of section parameters depend on the model selected (SIPNET 136) modelid=5000000002 # PFT selected (we should just use a number here) # NOTE: the square brackets are needed and will need be escaped with a \\ if you call this from command line pft[]=temperate.coniferous # initial pool condition (-1 means nothing is selected) input_poolinitcond=-1 # met data input_met=99000000006 # variables to collect variables=NPP,GPP # ensemble size runs=10 # use sensitivity analysis sensitivity=-1,1 # redirect to the edit pecan.xml file pecan_edit=on # redirect to edit the model configuration files model_edit=on # use browndog browndog=on For example the following will run the above workflow. Using -v in curl will show verbose output (needed) and the grep will make sure it only shows the redirect. This will show the actual workflowid: curl -s -v 'http://localhost:6480/pecan/04-runpecan.php?hostname=pecan.vm&siteid=772&start=2004-01-01&end=2004-12-31&modelid=5000000002&pft\\[\\]=temperate.coniferous&input_poolinitcond=-1&input_met=99000000006' 2>&1 | grep 'Location:' < Location: 05-running.php?workflowid=99000000004 In this case you can use the browser to see progress, or use the following to see the status: curl -s 'http://localhost:6480/pecan/dataset.php?workflowid=99000000004&type=file&name=STATUS' TRAIT 2017-12-13 08:56:56 2017-12-13 08:56:57 DONE META 2017-12-13 08:56:57 2017-12-13 08:57:13 DONE CONFIG 2017-12-13 08:57:13 2017-12-13 08:57:14 DONE MODEL 2017-12-13 08:57:14 2017-12-13 08:57:15 DONE OUTPUT 2017-12-13 08:57:15 2017-12-13 08:57:15 DONE ENSEMBLE 2017-12-13 08:57:15 2017-12-13 08:57:16 DONE FINISHED 2017-12-13 08:57:16 2017-12-13 08:57:16 DONE Or to show the output log: curl -s 'http://localhost:6480/pecan/dataset.php?workflowid=99000000004&type=file&name=workflow.Rout' R version 3.4.3 (2017-11-30) -- "Kite-Eating Tree" Copyright (C) 2017 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. .... "],["basic-web-workflow.html", "6 Basic Web workflow", " 6 Basic Web workflow This chapter describes the major steps of the PEcAn web-based workflow, which are as follows: Model and site selection Model configuration Run execution – TODO! Results – TODO! Interactive visualizations – TODO! We recommend that all new users begin with [PEcAn Hands-On Demo 01: Basic Run]. The documentation below assumes you are already familiar with how to navigate to PEcAn’s interactive web interface for running models. "],["web-site-model.html", "6.1 Site and model selection", " 6.1 Site and model selection This page is used to select the model to run and the site at which you would like to run that model. NOTE: If this page does not load for you, it may be related to a known Google Maps API key issue. See issue #1269 for a possible solution. 6.1.1 Selecting a model On the Select Host webpage use the Host pull-down menu to select the server you want to run on. PEcAn is designed to allow models to be run both locally and on remote high-performance computing (HPC) resources (i.e. clusters). We recommend that users start with local runs. More information about connecting your PEcAn instance to a cluster can be found on the Remote execution with PEcAn page. Next, select the model you want to run under the Model pull-down menu. The list of models currently supported by PEcAn, along with information about these models, is available on the PEcAn Models page. If a PEcAn-supported model is not listed, this is most likely because the model has not been installed on the server. The PEcAn team does not have permissions to redistribute all of the models that are coupled to it, so you will have to install some PEcAn-compatible models yourself. Please consult the PEcAn model listing for information about obtaining and installing different models. Once the model is installed and you have added the location of the model executable to Bety (see Adding An Ecosystem Model), your model should appear on the PEcAn Select Host page after your refresh the page. If you would like to add a new model to PEcAn please consult our guide for Adding an Ecosystem Model and contact the PEcAn team for assistance. If selecting your model causes your site to disappear from the Google Map, that means the site exists but there are no drivers for that model-site combination registered in the database. Click the “Conversion” checkbox. If your site reappears, that means PEcAn should be able to automatically generate the required inputs for this site by converting from existing input files in other formats. If the site still does not reappear, that means there are required input files for that model-site combination that PEcAn cannot autogenerate. This may be because the model has unique input requirements or because it has not yet been fully coupled to the PEcAn input processing workflow. Go to the troubleshooting section under Selecting a site for more information on diagnosing what drivers are missing. 6.1.2 Selecting a site 6.1.3 Site Groups PEcAn provides the option of organizing sites into groups to make them easier to find and easier to run as a group. We have pre-loaded a number of common research networks (e.g., FLUXNET, LTER, NEON), but you are free to create new site groups through Bety. If you are searching for a site that is not part of an existing site group, or you are unsure which site group it belongs to, select “All Sites” to see all sites in Bety. Note that this may take a while to render. 6.1.4 Using existing sites Find the site on the map The simplest way of determining if a site exists in PEcAn is through the Google Map interface of the web-based workflow. You’ll want to make sure that the “Site Group” is set to “All Sites” and the “Model” is set to “All Models”. Find the site in BETY If the site is not on the map, it may still be in Bety but with insufficient geographic information. To locate the site in Bety, first login to your local version of the BETY database. If using the VM, navigate to localhost:6480/bety and login with username bety and password illinois. Then, navigate to Data > Sites and use the “Search” box to search for your site. If you do find your site, click “Edit” and add geographic information so that the site will show up on the map. Also, note that the site ID number shows up in the URL for the “Show” or “Edit” pages. This ID is often useful to know, for example when editing a PEcAn settings file by hand. If you did not find you site, follow the instructions below to add a site. 6.1.5 Adding a new site (TODO: Move most of this out) Log into Bety as described above. Pick a citation for your site Each site requires an associated “citation” that must be added before the site itself is added. First, navigate to “Data > Citations” and use the “Search” box to see if the relevant citation already exists. If it does, click the check mark under “Actions” to proceed to site creation. To create a new citation, click the New Citation button, fill in the fields, and then click “Create”. The “field URL” should contain the web address that takes you to this publication on the publisher’s website. The “PDF” field should be the full web address to a PDF for this citation. Note that our definition of a citation is flexible, and a citation need not be a peer-reviewed publication. Most of the fields in “New Citation” can be left blank, but we recommend at least adding a descriptive title, such as “EBI Farm Field Data” and a relevant contact person as the “Author”. Once the Citation is created or selected this should automatically take you to the Sites page and list any Sites already associated with this citation. To create a new site click the New Site button. When creating a new site, the most important fields are the Site name and coordinates (latitude and longitude). The coordinates can be entered by hand or by clicking on the site location on the Google Map interface. All other information is optional, but can be useful for searching and indexing purposes. When you are done click Create. At this point, once the PEcAn site-level page is refreshed, the site should automatically appear. 6.1.6 Troubleshooting 6.1.6.1 My site shows up when I don’t have any model selected, but disappears once I select the model I want to run Selecting a model will cause PEcAn to filter the available sites based on whether they possess the required Inputs for a given model (e.g. meteorology). To check what Inputs are missing for a site point your browser to the pecan/checksite.php webpage (e.g. localhost:6480/pecan/checksite.php). This page looks virtually identical to the site selection page, except that it has a Check button instead of Prev and Next. If you select a Machine, Model, and Site and then click Check the page should return a list of what Inputs are missing (listing both the name and the Format ID number). Don’t forget that its possible for PEcAn to have required Inputs in its database, but just not have them for the Machine where you want to run. To see more about what Inputs a given model can accept, and which of those are required, take a look at the MODEL_TYPE table entry in the database (e.g. go to localhost:6480/bety; Select Runs > Model Type; and then click on the model you want to run). For information about loading missing Inputs into the database visit Input records in BETY, and also read the rest of the pages under this section, which will provide important information about the specific classes of Inputs (e.g. meteorology, vegetation, etc). Finally, we are continually developing and refining workflows and standards for processing Input data in a model-agnostic way. The details about what Inputs can be processed automatically are discussed input-by-input in the sections below. For those looking to dive into the code or troubleshoot further, these conversions are ultimately handled under the PEcAn.workflow::do_conversions workflow module. "],["web-model-config.html", "6.2 Model configuration", " 6.2 Model configuration This page is used for basic model configuration, including when your model will run and what input data it will use. 6.2.1 Choosing meteorology Once a Machine, Model, and Site have been selected, PEcAn will take you to the Input selection page. From this page you will select what Plant Functional Type (PFT) you want to run at a site, the start and end dates of the run, and various Input selections. The most common of these across all models is the need to specify meteorological forcing data. The exact name of the menu item for meteorology will vary by model because all of the Input requirements are generated individually for each model based on the MODEL_TYPE table. In general there are 3 possible cases for meteorology PEcAn already has driver files in its database PEcAn does not have drivers, but can generate them from publicly available data You need (or want) to upload your own drivers The first two cases will appear automatically in the the pull down menu. For meteorological files that already exist you will see the date range that’s available. By contrast, met that can be generated will appear as “Use ”, where is the origin of the data (e.g. “Use Ameriflux” will use the micromet from an Ameriflux eddy covariance tower, if one is present at the site). If you want to upload your own met data this can be done in three ways. The default way to add met data is to incorporate it into the overall meteorological processing workflow. This is preferred if you are working with a common meteorological data product that is not yet in PEcAn’s workflow. This case can be divided into two special cases: Data is in a common MIME-type that PEcAn already has a converter for (e.g. CSV). In this case you’ll want to create a new Format record for the meta-data so that the existing converter can process this data. See documentation for [Creating a new Format record in BETY] for more details. Data is in a more complicated format or interactive database, but large/useful enough to warrent a custom conversion function. Details on creating custom met conversions is in the [Input Conversion], though at this stage you would also be strongly encouraged to contact the PEcAn development team. The second-best way is to upload data in PEcAn’s standard meteorological format (netCDF files, CF metadata). See [Input Conversion] for details about variables and units. From this standard, PEcAn can then convert the file to the model-specific format required by the model you have chosen. This approach is preferred for a rare or one-off meterological file format, because PEcAn will also be able to convert the file into the format required by any other model as well. The last option for adding met data is to add it in a model-specific format, which is often easiest if you’ve already been running your model at a site and are just switching to using PEcAn. 6.2.2 Met workflow In a nutshell, the PEcAn met workflow is designed to reduce the problem of converting n possible met inputs into m possible model formats, which requires n x m conversion functions as well as numerous custom functions for downscaling, gap filling, etc. Instead, PEcAn works with a single met standard, and thus requires n conversion functions, one for converting each data source into the PEcAn standard, and then m conversion functions for converting from that standard to what an individual model requires. For a new model joining the PEcAn system the burden in particularly low – writing one conversion function provides access to n inputs. Similarly, PEcAn performs all other operations/manipulations (extracting a site, downscaling, gap filling, etc) within the PEcAn standard, which means these operations only need be implemented once. Consider a generic met data product named MET for simplicity. PEcAn will use a function, download.MET, to pull data for the selected year from a public data source (e.g. Ameriflux, North American Regional Reanalysis, etc). Next, PEcAn will use a function, met2CF.MET, to convert the data into the PEcAn standard. If the data is already at the site scale it will then gapfill the data. If the data is a regional or global data product, PEcAn will then permute the data to allow easier site-level extraction, then it will extract data for the requested site and data range. Modules to address the temporal and spatial downscaling of meteorological data products, as well as their uncertainties, are in development but not yet part of the operational workflow. All of these functions are located within the data.atmosphere module. Once data is in the standard format and processed, it will be converted to the model-specific format using a met2model.MODEL function (located in that MODEL’s module). More detailed information on how PEcAn processes inputs can be found on our [Input Conversion] page. 6.2.3 Troubleshooting meteorological conversions At the current moment, most of the issues below address possible errors that the Ameriflux meteorology workflow might report 6.2.3.1 Could not do gapfill … The following variables have NA’s This error message means that there were gaps in the downloaded data, for whatever variables that were listed, which were larger than the current algorithm could fill. Particularly common is missing radiation or PAR data, as Ameriflux frequently converts nighttime data to NULL, and work is in progress to detect this based on solar geometry. Also common are incomplete years (first or last year of tower operations). 6.2.3.2 Could not get information about . Is this an Ameriflux site? This message occurs when PEcAn believes that a site is part of Ameriflux (because it was listed on the Ameriflux or FLUXNET webpage and has a US-* site code), but no data is present on the Ameriflux server. The most common reasons for this is that you have selected a site that has not submitted data to Ameriflux yet (or that data hasn’t been processed yet), or you have selected a year that’s outside the tower’s operational period. Visit Ameriflux and FLUXNET for lists of available site years. 6.2.3.3 Could not download data for for the year This is similar to the previous error, but in this case PEcAn did find data for the site listed, but just not for the year requested. This can usually be fixed by just altering the years of the run to match those with available data. 6.2.3.4 I could not find the requested var (or dimvar) in the file! PEcAn could not find a required variable within the downloaded file. Most likely this is due to that variable not being measured at this site. The most common cause of failure is the absence of atmospheric pressure data (PRESS), but since most models have a low sensitivity to this variable we are working on methods to estimate this from other sources. "],["selecting-plant-functional-types-pfts-and-other-parameter-groupings..html", "6.3 Selecting Plant Functional Types (PFTs) and other parameter groupings.", " 6.3 Selecting Plant Functional Types (PFTs) and other parameter groupings. 6.3.1 Using existing PFTs PEcAn does not automatically know what vegetation types are present at your study site so you need to select the PFT. Some models, such as ED2 and LINKAGES, support competition among multiple PFTs and thus you are encouraged to highlight multiple choices. Other models, such as SIPNET and DALEC, only support one PFT at a site. Many models also have parameters that control non-vegetation processes (e.g. soil biogeochemistry and hydrology). PEcAn allows users to assign these parameters to functional groups as well (e.g. a soils PFT) 6.3.2 Creating new PFTs To modify or add a new Plant Functional Type (PFT), or to change a PFT’s priors, navigate on the grey menu bar to Data > PFTs To add a new pft, click “new PFT” at the top and enter a name and description. (hint: we’re trying to name PFTs based on model.biome.pft, ED2 is the default model if one isn’t specified) To add new species to a PFT click on [+] View Related Species and type the species, genus, or family you are looking for into the Search box. Click on the + to add. To remove a species from a PFT, click on [+] View Related Species and click on the X of the species you want to remove from the PFT. To remove a prior, click [-] View Related Prior and click on the X of the variable who’s prior you want to remove. This will cause the parameter to be excluded from all analyses (meta-analysis, sensitivity analysis, etc) and revert to its default value. To add a prior, choose one from the white box of priors on the right to choose. To view the specification of a prior, or to add a new prior, click BETY-DB > Priors and enter the information on the variable, distribution name, distribution parameters, etc. N is the sample size underlying the prior specification (0 is ok for uninformative priors). You can also got to Data > Variables in order to use the search function to find an existing variable (or create a new one). Please try not to create new variables unnecessarily (e.g. changes of variable name or units to what your model uses is handled internally, so you want to find the trait with the correct MEANING). Additional information on adding PFTs, Species, and Priors can be found in Adding An [Ecosystem Model]. 6.3.3 Choosing initial vegetation On the Input Selection webpage, in addition to selecting PFTs, start & end dates, and meteorology, many models also require some way of specifying the initial conditions for the vegetation, which may range from setting the aboveground biomass and LAI up to detailed inventory-like data on species composition and stand structure. At the moment, PEcAn has three cases for initial conditions: If files already exist in the database, they can simply be selected from the menu. For ED2, there are 3 different veg files (site, pss, css) and it is important that you select a complete set, not mix and match. If files don’t exist they can be uploaded following the instructions in Create a database file record for the input data. Automated vegetation initial condition workflow As with meteorology, PEcAn is working to develop a model-agnostic workflow for converting various sources of vegetation data to common standards, developing common processing tools, and then writing out to model-specific formats. This process is in a much early stage than the meteorology workflow, as we are still researching what the options are for standard formats, but ultimately aims to be much more broad in scope, considering not just plot inventory data but also historical documentation, paleoecological proxies, satellite remote sensing (e.g. LANDSAT), airborne hyperspectral imagery, and active remote sensing (Lidar, Radar). At the moment, what is functional is a prototype workflow that works for inventory-based vegetation data. This data can come from either files that have been registered with the BETY Inputs and Formats tables or can be queried from the USFS Forest Inventory and Analysis (FIA). For more information visit Section 13.1.2.2 Vegetation Data 6.3.4 US FIA This tool works with an internal copy of the FIA that is uploaded to a postGRES database along side BETY, however for space reasons this database does not ship with the PEcAn VM. To turn this feature on: Download and Install the FIA database. Instructions in Installing data for PEcAn For web-base runs, specify the database settings in the config.php For R-based runs, specify the database settings in the THE PEcAn XML More detailed information on how PEcAn processes inputs can be found on our [Input Conversion]page. 6.3.5 Spin up A number of ecosystem models are typically initialized by spinning up to steady state. At the moment PEcAn doesn’t handle spin up automatically (e.g. looping met, checking for stability), but there are various ways to achieve a spin-up within the system. Option 1: If there are model-specific settings in a model’s settings/config file, then that file can be accessed by clicking on the Edit model config check box. If this box is selected then PEcAn will pause the site run workflow after it has generated your model config file, but before it runs the model, and give you an opportunity to edit the file by hand, allowing you to change any model-specific spin up settings (e.g met recycling, spin up length) Option 2: Set start_year very early and set the met drivers to be a long time series (e.g. PalEON, something custom uploaded to Inputs) Option 3: In the MODEL_TYPE table, add your model’s restart format as an optional input, modify the model specific write.config function to use that restart, and then load a previous spin-up to the Inputs table Beyond these options, we hope to eventually develop more general, model-agnostic tools for spin up. In particular, we have started to explore the accelerated spin-up and semi-analytical techniques being developed by Yiqi Luo’s lab 6.3.6 Selecting a soils product Many models have requirements for soils information, which may include: site-specific soil texture and depth information; soil biogeochemical initial conditions (e.g. soil carbon and nitrogen pools); soil moisture initial conditions; and soil thermal initial conditions. As with Choosing initial vegetation, we eventually hope to develop data standards, soils workflows, and spin-up tools, but at the moment this workflow is in the early stages of development. Model requirements need to be met byCreating a new Input record in BETY into the database or using files that have already been uploaded. Similar to met, we recommend that this file be in the PEcAn-standard netCDF described below, but model-specific files can also be registered. 6.3.7 Soil texture, depth, and physical parameters A PEcAn-standard netCDF file format exists for soil texture, depth, and physical parameters, using PEcAn standard names that are largely a direct extention of the CF standard. The easiest way to create this file is with the PEcAn R function soil2netcdf as described in the Soil Data section of the Advanced Users Guide. A table of standard names and units can be listed using PEcAn.data.land::soil.units() with no arguments. More detailed information on how PEcAn processes inputs can be found on our [Input Conversion] page. 6.3.8 Other model inputs Finally, any other model-specific inputs (e.g. N deposition, land use history, etc), should be met by Creating a new Input record in BETY or using files that have already been uploaded. "],["intermediate-user.html", "7 More on the PEcAn Web Interface", " 7 More on the PEcAn Web Interface This section will provide information to those wanting to take advantage of PEcAn’s customizations from the web interface. Additional web configuration - Advanced options available from the web interface Brown Dog Sensitivity and ensemble analyses[TODO: Under construction…] [Editing model configurations][TODO: Under construction…] Settings-configured analyses - Analyses only available by manually editing pecan.xml Parameter data assimilation (PDA) State data assimilation (SDA) Remote execution with PEcAn - Running analyses and generally working with external machines (HPC) in the context of PEcAn. "],["additional-web-configuration.html", "7.1 Additional web configuration", " 7.1 Additional web configuration Additional settings for web configuration: Web interface setup Brown Dog Advanced setup [Sensitivity analysis] (TODO) [Uncertainty analysis] (TODO) Editing model configuration files 7.1.1 Web interface setup There are few options which you can change via web interface. To visit the configuration page either you can just click on the setups link on the introduction page alternatively can type <host>/setups/. The list of configuration available Database configuration : BETYdb(Biofuel Ecophysiological Traits and Yields database) configuration details, can be edited according to need. Browndog configuration : Browndog configuration details, Used to connect browndog. Its included by default in VM. FIA Database : FIA(Forest Inventory and Analysis) Database configuration details, Can be used to add additional data to models. Google MapKey : Google Map key, used to access the google map by PEcAn. Change Password : A small infomation to change the VM user password. (if using Docker image it won’t work) Automatic Sync : If ON then it will sync the database between local machine and the remote servers. Still unders testing part might be buggy. Still work on the adding other editing feature going on, this page will be updated as new configuration will be available. 7.1.2 Brown Dog The Browndog service provides PEcAn with access to large and diverse sets of data at the click of a button in the format that PEcAn needs. By clicking the checkbox you will be using the Browndog Service to process data. For more information regarding meteorological data check out Available Meteorological Drivers. More information can be found at the Browndog website. 7.1.3 Advanced Setup (TODO: Under construction…) 7.1.4 Editing model configurations (TODO: Under construction…) "],["settings-configured-analyses.html", "7.2 Settings-configured analyses", " 7.2 Settings-configured analyses These analyses can be run through the web interface, but lack graphical interfaces and currently can only be configured throughthe XML settings. To run these analyses use the Edit pecan.xml checkbox on the Input configuration page. Eventually, these modules will be integrated into the web user interface. Parameter Data Assimilation (PDA) State Data Assimilation (SDA) MultiSettings Benchmarking (TODO: Add links) 7.2.1 Parameter data assimilation (PDA) All functions pertaining to Parameter Data Assimilation are housed within: pecan/modules/assim.batch. For a detailed usage of the module, please see the vignette under pecan/modules/assim.batch/vignettes. Hierarchical version of the PDA is also implemented, for more details, see the MultiSitePDAVignette package vignette and function-level documentation. 7.2.1.1 pda.mcmc.R This is the main PDA code. It performs Bayesian MCMC on model parameters by proposing parameter values, running the model, calculating a likelihood (between model output and supplied observations), and accepting or rejecting the proposed parameters (Metropolis algorithm). Additional notes: The first argument is settings, followed by others that all default to NULL.settings is a list used throughout Pecan, which contains all the user options for whatever analyses are being done. The easiest thing to do is just pass that whole object all around the Pecan code and let different functions access whichever settings they need. That’s what a lot of the rest of the Pecan code does. But the flexibility to override most of the relevant settings in settings is there by providing them directly as arguments to the function. The if(FALSE)… : If you’re trying to step through the function you probably will have the settings object around, but those other variables will be undefined. If you set them all to NULL then they’ll be ignored without causing errors. It is there for debugging purposes. The next step calls pda.settings(), which is in the file pda.utils.R (see below). It checks whether any settings are being overridden by arguments, and in most cases supplies default values if it can’t find either. In the MCMC setup section The code is set up to allow you to start a new MCMC chain, or to continue a previous chain as specified in settings. The code writes a simple text file of parameter samples at every iteration, which lets you get some results and even re-start an MCMC that fails for some reason. The code has adaptive jump distributions. So you can see some initialization of the jump distributions and associated variables here. Finally, note that after all this setup a new XML settings file is saved. The idea is that the original pecan.xml you create is preserved for provenance, and then periodically throughout the workflow the settings (likely containing new information) are re-saved with descriptive filenames. MCMC loop Periodically adjust jump distribution to make acceptance rate closer to target Propose new parameters one at a time. For each: First, note that Pecan may be handling many more parameters than are actually being targeted by PDA. Pecan puts priors on any variables it has information for (in the BETY database), and then these get passed around throughout the analysis and every step (meta-, sensitivity, ensemble analyses, etc.). But for PDA, you specify a separate list of probably far fewer parameters to constrain with data. These are the ones that get looped over and varied here. The distinction between all parameters and only those dealt with in PDA is dealt with in the setup code above. First a new value is proposed for the parameter of interest. Then, a new model run is set up, identical to the previous except with the new proposed value for the one parameter being updated on this run. The model run is started, and outputs collected after waiting for it to finish. A new likelihood is calculated based on the model outputs and the observed dataset provided. Standard Metropolis acceptance criteria is used to decide whether to keep the proposed parameter. Periodically (at interval specified in settings), a diagnostic figure is saved to disk so you can check on progress. This works only for NEE currently 7.2.1.2 pda.mcmc.bs.R This file is basically identical to pda.mcm.R, but rather than propose parameters one at a time, it proposes new values for all parameters at once (“bs” stands for “block sampling”). You choose which option to use by specifying settings\\(assim.batch\\)method: * “bruteforce” means sample parameters one at a time * “bruteforce.bs” means use this version, sampling all parameters at once * “emulator” means use the emulated-likelihood version 7.2.1.3 pda.emulator This version of the PDA code again looks quite similar to the basic “bruteforce” one, but its mechanics are very different. The basic idea is, rather than running thousands of model iterations to explore parameter space via MCMC, run a relatively smaller number of runs that have been carefully chosen to give good coverage of parameter space. Then, basically interpolate the likelihood calculated for each of those runs (actually, fit a Gaussian process to it), to get a surface that “emulates” the true likelihood. Now, perform regular MCMC (just like the “bruteforce” approach), except instead of actually running the model on every iteration to get a likelihood, just get an approximation from the likelihood emulator. Since the latter step takes virtually no time, you can run as long of an MCMC as you need at little computational cost, once you have done the initial model runs to create the likelihood emulator. 7.2.1.4 pda.mcmc.recover.R This function is for recovering a failed PDA MCMC run. 7.2.1.5 pda.utils.R This file contains most of the individual functions used by the main PDA functions (pda.mcmc.*.R). assim.batch is the main function Pecan calls to do PDA. It checks which method is requested (bruteforce, bruteforce.bs, or emulator) and call the appropriate function described above. pda.setting handles settings. If a setting isn’t found, the code can usually supply a reasonable default. pda.load.priors is fairly self explanatory, except that it handles a lot of cases and gives different options priority over others. Basically, the priors to use for PDA parameters can come from either a Pecan prior.distns or post.distns object (the latter would be, e.g., the posteriors of a meta-analysis or previous PDA), or specified either by file path or BETY ID. If not told otherwise, the code tries to just find the most recent posterior in BETY, and use that as prior for PDA. pda.create.ensemble gets an ensemble ID for the PDA. All model runs associated with an individual PDA (any of the three methods) are considered part of a single ensemble. This function does is register a new ensemble in BETY, and return the ID that BETY gives it. pda.define.prior.fn creates R functions for all of the priors the PDA will use. pda.init.params sets up the parameter matrix for the run, which has one row per iteration, and one column per parameter. Columns include all Pecan parameters, not just the (probably small) subset that are being updated by PDA. This is for compatibility with other Pecan components. If starting a fresh run, the returned matrix is just a big empty matrix to fill in as the PDA runs. If continuing an existing MCMC, then it will be the previous params matrix, with a bunch of blank rows added on for filling in during this round of PDA. pda.init.run This is basically a big wrapper for Pecan’s write.config function (actually functions [plural], since every model in Pecan has its own version). For the bruteforce and bruteforce.bs methods this will be run once per iteration, whereas the emulator method knows about all its runs ahead of time and this will be a big batch of all runs at once. pda.adjust.jumps tweaks the jump distributions for the standard MCMC method, and pda.adjust.jumps.bs does the same for the block-sampled version. pda.calc.llik calculates the log-likelihood of the model given all datasets provided to compare it to. pda.generate.knots is for the emulator version of PDA. It uses a Latin hypercube design to sample a specified number of locations in parameter space. These locations are where the model will actually be run, and then the GP interpolates the likelihood surface in between. pda.plot.params provides basic MCMC diagnostics (trace and density) for parameters being sampled. pda.postprocess prepares the posteriors of the PDA, stores them to files and the database, and performs some other cleanup functions. pda.load.data.r This is the function that loads in data that will be used to constrain the PDA. It’s supposed to be eventually more integrated with Pecan, which will know how to load all kinds of data from all kinds of sources. For now, it can do NEE from Ameriflux. pda.define.llik.r A simple helper function that defines likelihood functions for different datasets. Probably in the future this should be queried from the database or something. For now, it is extremely limited. The original test case of NEE assimilation uses a heteroskedastic Laplacian distribution. pda.get.model.output.R Another function that will eventually grow to handle many more cases, or perhaps be replaced by a better system altogether. For now though, it again just handles Ameriflux NEE. 7.2.1.6 get.da.data.*.R, plot.da.R Old codes written by Carl Davidson. Defunct now, but may contain good ideas so currently left in. 7.2.2 State data assimilation (SDA) sda.enkf.R is housed within: /pecan/modules/assim.sequential/R The tree ring tutorial is housed within: /pecan/documentation/tutorials/StateAssimilation More descriptive SDA methods can be found at: /pecan/book_source/adve_user_guide_web/SDA_Methods.Rmd 7.2.2.1 sda.enkf.R Description This is the main ensemble Kalman filter and generalized filter code. Originally, this was just ensemble Kalman filter code. Mike Dietze and Ann Raiho added a generalized ensemble filter to avoid filter divergence. The output of this function will be all the of run outputs, a PDF of diagnostics, and an Rdata object that includes three lists: FORECAST will be the ensemble forecasts for each year ANALYSIS will be the updated ensemble sample given the NPP observations enkf.params contains the prior and posterior mean vector and covariance matrix for each time step. 7.2.2.2 sda.enkf.R Arguments settings - (required) State Data Assimilation Tags Example settings object obs.mean - (required) a list of observation means named with dates in YYYY/MM/DD format obs.cov - (required) a list of observation covariances names with dates in YYYY/MM/DD format IC - (optional) initial condition matrix (dimensions: ensemble memeber # by state variables). Default is NULL. Q - (optional) process covariance matrix (dimensions: state variable by state variables). Default is NULL. 7.2.2.3 State Data Assimilation Workflow Before running sda.enkf, these tasks must be completed (in no particular order), Read in a State Data Assimilation Tags Example settings file with tags listed below. i.e. read.settings(‘pecan.SDA.xml’) Load data means (obs.mean) and covariances (obs.cov) as lists with PEcAn naming and unit conventions. Each observation must have a date in YYYY/MM/DD format (optional time) associated with it. If there are missing data, the date must still be represented in the list with an NA as the list object. Create initial conditions matrix (IC) that is state variables columns by ensemble members rows in dimension. sample.IC.MODEL can be used to create the IC matrix, but it is not required. This IC matrix is fed into write.configs for the initial model runs. The main parts of the SDA function are: Setting up for initial runs: Set parameters Load initial run inputs via split.inputs.MODEL Open database connection Get new workflow ids Create ensemble ids Performing the initial set of runs Set up for data assimilation Loop over time read.restart.MODEL - read model restart files corresponding to start.time and stop.time that you want to assimilate data into Analysis - There are four choices based on if process variance is TRUE or FALSE and if there is data or not. See explaination below. write.restart.MODEL - This function has two jobs. First, to insert adjusted state back into model restart file. Second, to update start.time, stop.time, and job.sh. run model Save outputs Create diagnostics 7.2.2.4 State Data Assimilation Tags Example <state.data.assimilation> <adjustment>TRUE</adjustment> <process.variance>FALSE</process.variance> <sample.parameters>FALSE</sample.parameters> <q.type>Single</q.type> <state.variables> <variable> <variable.name>AGB.pft</variable.name> <unit>MgC/ha/yr</unit> <min_value>0</min_value> <max_value>100000000</max_value> </variable> <variable> <variable.name>TotSoilCarb</variable.name> <unit>KgC/m^2</unit> <min_value>0</min_value> <max_value>100000000</max_value> </variable> </state.variables> <spin.up> <start.date>1950/01/01</start.date> <end.date>1960/12/31</end.date> </spin.up> <forecast.time.step>1</forecast.time.step> <start.date>1961/01/01</start.date> <end.date>2010/12/31</end.date> </state.data.assimilation> 7.2.2.5 State Data Assimilation Tags Descriptions adjustment : [optional] TRUE/FLASE flag for if ensembles needs to be adjusted based on weights estimated given their likelihood during analysis step. The Default is TRUE for this flag. process.variance : [optional] TRUE/FLASE flag for if process variance should be estimated (TRUE) or not (FALSE). If TRUE, a generalized ensemble filter will be used. If FALSE, an ensemble Kalman filter will be used. Default is FALSE. If you use the TRUE argument you can set three more optional tags to control the MCMCs built for the generalized esnsemble filter. nitrGEF : [optional] numeric defining the length of the MCMC chains. nthin : [optional] numeric defining thining length for the MCMC chains. nburnin : [optional] numeric defining the number of burnins during the MCMCs. q.type : [optional] If the process.variance is set to TRUE then this can take values of Single, Site or PFT. censored.data : [optional] logical set TRUE for censored state variables. sample.parameters : [optional] TRUE/FLASE flag for if parameters should be sampled for each ensemble member or not. This allows for more spread in the initial conditions of the forecast. state.variable : [required] State variable that is to be assimilated (in PEcAn standard format). spin.up : [required] start.date and end.date for initial model runs. NOTE: start.date and end.date are distinct from values set in the run tag because initial runs can be done over a subset of the full run. forecast.time.step : [optional] In the future, this will be used to allow the forecast time step to vary from the data time step. start.date : [optional] start date of the state data assimilation (in YYYY/MM/DD format) end.date : [optional] end date of the state data assimilation (in YYYY/MM/DD format) NOTE: start.date and end.date are distinct from values set in the run tag because this analysis can be done over a subset of the run. 7.2.2.6 Model Specific Functions for SDA Workflow 7.2.2.7 read.restart.MODEL.R The purpose of read.restart is to read model restart files and return a matrix that is site rows by state variable columns. The state variables must be in PEcAn names and units. The arguments are: outdir - output directory runid - ensemble member run ID stop.time - used to determine which restart file to read (in POSIX format) settings - pecan.SDA.xml settings object var.names - vector with state variable names with PEcAn standard naming. Example: c(‘AGB.pft’, ‘TotSoilCarb’) params - parameters used by ensemble member (same format as write.configs) 7.2.2.8 write.restart.MODEL.R This model specific function takes in new state and new parameter matrices from sda.enkf.R after the analysis step and translates new variables back to the model variables. Then, updates start.time, stop.time, and job.sh so that start_model_runs() does the correct runs with the new states. In write.restart.LINKAGES and write.restart.SIPNET, job.sh is updated by using write.configs.MODEL. outdir - output directory runid - run ID for ensemble member start.time - beginning of model run (in POSIX format) stop.time - end of model run (in POSIX format) settings - pecan.SDA.xml settings object new.state - matrix from analysis of updated state variables with PEcAn names (dimensions: site rows by state variables columns) new.params - In the future, this will allow us to update parameters based on states (same format as write.configs) inputs - model specific inputs from split.inputs.MODEL used to run the model from start.time to stop.time RENAME - [optional] Flag used in write.restart.LINKAGES.R for development. 7.2.2.9 split.inputs.MODEL.R This model specific function gives the correct met and/or other model inputs to settings\\(run\\)inputs. This function returns settings\\(run\\)inputs to an inputs argument in sda.enkf.R. But, the inputs will not need to change for all models and should return settings\\(run\\)inputs unchanged if that is the case. settings - pecan.SDA.xml settings object start.time - start time for model run (in POSIX format) stop.time - stop time for model run (in POSIX format) 7.2.2.10 sample.IC.MODEL.R This model specific function is optional. But, it can be used to create initial condition matrix (IC) with # state variables columns by # ensemble rows. This IC matrix is used for the initial runs in sda.enkf.R in the write.configs.MODEL function. ne - number of ensemble members state - matrix of state variables to get initial conditions from year - used to determine which year to sample initial conditions from 7.2.2.11 Analysis Options There are four options depending on whether process variance is TRUE/FALSE and whether or not there is data or not. If there is no data and process variance = FALSE, there is no analysis step. If there is no data and process variance = TRUE, process variance is added to the forecast. If there is data and process variance = TRUE, the generalized ensemble filter is implemented with MCMC. If there is data and process variance = FALSE, the Kalman filter is used and solved analytically. 7.2.2.12 The Generalized Ensemble Filter An ensemble filter is a sequential data assimilation algorithm with two procedures at every time step: a forecast followed by an analysis. The forecast ensembles arise from a model while the analysis makes an adjustment of the forecasts ensembles from the model towards the data. An ensemble Kalman filter is typically suggested for this type of analysis because of its computationally efficient analytical solution and its ability to update states based on an estimate of covariance structure. But, in some cases, the ensemble Kalman filter fails because of filter divergence. Filter divergence occurs when forecast variability is too small, which causes the analysis to favor the forecast and diverge from the data. Models often produce low forecast variability because there is little internal stochasticity. Our ensemble filter overcomes this problem in a Bayesian framework by including an estimation of model process variance. This methodology also maintains the benefits of the ensemble Kalman filter by updating the state vector based on the estimated covariance structure. This process begins after the model is spun up to equilibrium. The likelihood function uses the data vector \\(\\left(\\boldsymbol{y_{t}}\\right)\\) conditional on the estimated state vector \\(\\left(\\boldsymbol{x_{t}}\\right)\\) such that \\(\\boldsymbol{y}_{t}\\sim\\mathrm{multivariate\\:normal}(\\boldsymbol{x}_{t},\\boldsymbol{R}_{t})\\) where \\(\\boldsymbol{R}_{t}=\\boldsymbol{\\sigma}_{t}^{2}\\boldsymbol{I}\\) and \\(\\boldsymbol{\\sigma}_{t}^{2}\\) is a vector of data variances. To obtain an estimate of the state vector \\(\\left(\\boldsymbol{x}_{t}\\right)\\), we use a process model that incorporates a process covariance matrix \\(\\left(\\boldsymbol{Q}_{t}\\right)\\). This process covariance matrix differentiates our methods from past ensemble filters. Our process model contains the following equations \\(\\boldsymbol{x}_{t} \\sim \\mathrm{multivariate\\: normal}(\\boldsymbol{x}_{model_{t}},\\boldsymbol{Q}_{t})\\) \\(\\boldsymbol{x}_{model_{t}} \\sim \\mathrm{multivariate\\: normal}(\\boldsymbol{\\mu}_{forecast_{t}},\\boldsymbol{P}_{forecast_{t}})\\) where \\(\\boldsymbol{\\mu}_{forecast_{t}}\\) is a vector of means from the ensemble forecasts and \\(\\boldsymbol{P}_{forecast_{t}}\\) is a covariance matrix calculated from the ensemble forecasts. The prior for our process covariance matrix is \\(\\boldsymbol{Q}_{t}\\sim\\mathrm{Wishart}(\\boldsymbol{V}_{t},n_{t})\\) where \\(\\boldsymbol{V}_{t}\\) is a scale matrix and \\(n_{t}\\) is the degrees of freedom. The prior shape parameters are updated at each time step through moment matching such that \\(\\boldsymbol{V}_{t+1} = n_{t}\\bar{\\boldsymbol{Q}}_{t}\\) \\(n_{t+1} = \\frac{\\sum_{i=1}^{I}\\sum_{j=1}^{J}\\frac{v_{ijt}^{2}+v_{iit}v_{jjt}}{Var(\\boldsymbol{\\bar{Q}}_{t})}}{I\\times J}\\) where we calculate the mean of the process covariance matrix \\(\\left(\\bar{\\boldsymbol{Q}_{t}}\\right)\\) from the posterior samples at time t. Degrees of freedom for the Wishart are typically calculated element by element where \\(v_{ij}\\) are the elements of \\(\\boldsymbol{V}_{t}\\). \\(I\\) and \\(J\\) index rows and columns of \\(\\boldsymbol{V}\\). Here, we calculate a mean number of degrees of freedom for \\(t+1\\) by summing over all the elements of the scale matrix \\(\\left(\\boldsymbol{V}\\right)\\) and dividing by the count of those elements \\(\\left(I\\times J\\right)\\). We fit this model sequentially through time in the R computing environment using R package ‘rjags.’ Users have control over how they think is the best way to estimate \\(Q\\). Our code will look for the tag q.type in the XML settings under state.data.assimilation which can take 4 values of Single, Site, Vector, or Wishart. If q.type is set to single then one value of process variance will be estimated across all different sites and variables. When q.type is set to Site then a process variance will be estimated for each siteat a cost of more time and computation power. When the q.type is set to Vector or Wishart then process errors for each variable of each site will be estimated and propagated through time, while the Wishart Q support the estimation of covariance between sites and variables through the MCMC sampling of wishart distributions, which further support the propagation of process error through not just time but space and variables. 7.2.2.13 Multi-site State data assimilation. sda.enkf.multisite is housed within: /pecan/modules/assim.sequential/R The 4-site tutorial is housed within: ~/pecan/modules/assim.sequential/inst/MultiSite-Exs 7.2.2.14 sda.enkf.multisite.R Description sda.enkf.multisite function allows for assimilation of observed data at multiple sites at the same time. In order to run a multi-site SDA, one needs to send a multisettings pecan xml file to this function. This multisettings xml file needs to contain information required for running at least two sites under run tag. The code will automatically run the ensembles for all the sites and reformats the outputs matching the required formats for analysis step. #### sda.enkf.multisite.R Arguments * settings - (required) State Data Assimilation Tags Example settings object obs.mean - (required) Lists of date times named by time points, which contains lists of sites named by site ids, which contains observation means for each state variables of each site for each time point. obs.cov - (required) Lists of date times named by time points, which contains lists of sites named by site ids, which contains observation covariances for all state variables of each site for each time point. Q - (optional) Process covariance matrix given if there is no data to estimate it. Default is NULL. restart - (optional) Used for iterative updating previous forecasts. Default NULL. List object includes file path to previous runs and start date for SDA. Default is NULL. pre_enkf_params - (optional) Used for carrying out SDA with pre-existed enkf.params, in which the Pf, aqq, and bqq can be used for the analysis step. Default is NULL. ensemble.samples - (optional) Pass ensemble.samples from outside, which are lists of calibrated parameters for different plant functional types. This also helps to avoid GitHub check issues. Default is NULL. control - (optional) List of flags controlling the behavior of the SDA. Here is an example of the control list. The detailed explanation of them are shown inside the sda.enkf.multisite function in the assim.sequential package. control=list(trace = TRUE, TimeseriesPlot = FALSE, debug = FALSE, pause = FALSE, Profiling = FALSE, OutlierDetection=FALSE, parallel_qsub = TRUE, send_email = NULL, keepNC = TRUE, run_parallel = TRUE, MCMC.args = NULL) 7.2.2.15 obs.mean and obs.cov Description The observations are required for passing the time points into the SDA workflow, which should match the start and end date in the settings object. For the generations of obs.mean and obs.cov, please use the function SDA_OBS_Assembler inside the assim.sequential package. For the unconstrained runs, please specify the free.run flag as TRUE inside the settings$state.data.assimilation$Obs_Prep section. Otherwise, please specify the arguments that are needed for preparations of different observations (note that, the observation preparation functions currently only support MODIS LAI, Landtrendr AGB, SMAP soil moisture, and SoilGrid soil organic carbon). For the details about how to setup those arguments, please reference the Create_Multi_settings.R script inside ~/pecan/modules/assim.sequential/inst/MultiSite-Exs/SDA directory. The observed mean and covariance need to be formatted as list of different dates with observations. For each element of this list also there needs to be a list with mean and cov matrices of different sites named by their siteid. In case that zero variance was estimated for a variable inside the obs.cov, the SDA code will automatically replace that with half of the minimum variance from other non-zero variables in that time step. Here are examples of the obs.mean and obs.cov for single time point, two sites, and two observations. > obs.mean $`2010/12/31` $`2010/12/31`$`1000000650` AbvGrndWood GWBI 111.502 1.0746 $`2010/12/31`$`1000000651` AbvGrndWood GWBI 114.302 1.574695 > obs.cov $`2010/12/31` $`2010/12/31`$`1000000650` [,1] [,2] [1,] 19.7821691 0.213584319 [2,] 0.5135843 0.005162113 $`2010/12/31`$`1000000651` [,1] [,2] [1,] 15.2821691 0.513584319 [2,] 0.1213583 0.001162113 7.2.2.16 Anlysis SDA workflow Before running the SDA analysis functions, the ensemble forecast results have to be generated, and arguments such as H matrix, MCMC arguments, and multi-site Y and R (by Construct.R function) have to be generated as well. Here are the workflows for three types of SDA analysis functions that we are currently used. Decide which analysis function to be used. Here we have three options: 1) traditional ensemble Kalman Filter (EnKF.MultiSite) with analytical solution, within which the process error needs to be prescribed from outside (see Q arguments in the sda.enkf.multisite function); 2) generalized ensemble Kalman Filter (GEF.MultiSite); and 3) block-based generalized ensemble Kalman Filter (analysis_sda_block). The latter two methods support the feature of propagating process variance across space and time. To choose the analysis method 1, we need to set the process.variance as FALSE. Otherwise, if we set the process.variance as TRUE and provide the q.type as either SINGLE or SITE the method GEF.MultiSite will be used, and if we provide the q.type as either vector or wishart the method analysis_sda_block will be used. The explanations for different Q types can be found in the The Generalized Ensemble Filter section in this documentation. For the analysis_sda_block method, there is also a special case for complete or partial missing of observations. If we decide to use EnKF.MultiSite, then the analysis results will be calculated based on equations. If we decide to use GEF.MultiSite, then it will first do censoring process based on how you setup the censored.data flag within settings xml file. Then, if t equals to 1, we will first initialize the aqq, bqq, aq, and bq based on how you setup the q.type argument within settings xml file. After preparing the initial conditions (ICs), data, and constants for the GEF.MultiSite.Nimble nimble model, the MCMC sampling will happen afterwards. Finally, the process variance and the analysis results will be calculated and updated and returned to the sda.enkf.multisite function. If we decide to use analysis_sda_block method, then if t equals 1, the workflow will first build blocks using the matrix_network function for the calculations for indexes of variables by networks of site groups based on the spatial scale (see scalef argument in the Example of multi-settings pecan xml file section) that we specify inside the state.data.assimilation section. Then, the workflow will execute the build.block.xy to automatically initialize the overall block-based MCMC lists (block.list.all) and fill in datasets (mu.f, y.censored, or Pf) for each block and for all time points to facilitate the process of passing arguments between scripts/functions. After that, the MCMC_args (see the explanations inside the roxygen structure of the sda.enkf.multisite function) will be specified either from the control list or by default (see below). Then, the workflow will update the process error using the update_q function. If t equals 1, it will initialize the arguments for the process error. Otherwise, it will grab the previously updated process error. After that, in the MCMC_Init function, it will create the initial conditions (ICs) for the MCMC sampling based on randomly sampled data. Then, the completed block-based arguments (block.list.all) will be used by the MCMC_block_function function under the parallel mode. Finally, the block-based results will be converted into long vectors and large matrices by the block.2.vector function and values such as block.list.all, mu.f, mu.a, Pf, Pa will be returned to the sda.enkf.multisite function. MCMC.args <- list(niter = 1e5, nthin = 10, nchain = 3, nburnin = 1e4) There is a special case for the analysis_sda_block method where NA values appear in the observations, which provides the opportunity for the estimations of process error without any observations. This special case currently is only working under restricted conditions when we set the scalef as 0 (that feature currently only works for isolated-site SDA runs) and turn on the free.run flag in the settings, which will then automatically fill in NA values to the observations by each site (see bellow). <state.data.assimilation> <scalef>0</scalef> <free.run>TRUE</free.run> </state.data.assimilation> 7.2.2.17 Example of multi-settings pecan xml file Here is an example of what does a multi-settings pecan xml file look like. The detailed explanation for the xml file can be found under the Multi-Settings section in the 03_pecan_xml.Rmd documentation. <?xml version="1.0"?> <pecan.multi> <state.data.assimilation> <process.variance>TRUE</process.variance> <aqq.Init>1</aqq.Init> <bqq.Init>1</bqq.Init> <sample.parameters>FALSE</sample.parameters> <adjustment>TRUE</adjustment> <censored.data>FALSE</censored.data> <FullYearNC>TRUE</FullYearNC> <NC.Overwrite>FALSE</NC.Overwrite> <NC.Prefix>sipnet.out</NC.Prefix> <q.type>SINGLE</q.type> <free.run>FALSE</free.run> <Localization.FUN>Local.support</Localization.FUN> <scalef>1</scalef> <chains>5</chains> <state.variables> <variable> <variable.name>AbvGrndWood</variable.name> <unit>MgC/ha</unit> <min_value>0</min_value> <max_value>9999</max_value> </variable> <variable> <variable.name>LAI</variable.name> <unit></unit> <min_value>0</min_value> <max_value>9999</max_value> </variable> <variable> <variable.name>SoilMoistFrac</variable.name> <unit></unit> <min_value>0</min_value> <max_value>100</max_value> </variable> <variable> <variable.name>TotSoilCarb</variable.name> <unit>kg/m^2</unit> <min_value>0</min_value> <max_value>9999</max_value> </variable> </state.variables> <Obs_Prep> <Landtrendr_AGB> <AGB_input_dir>/projectnb/dietzelab/dongchen/Multi-site/download_500_sites/AGB</AGB_input_dir> <allow_download>TRUE</allow_download> <export_csv>TRUE</export_csv> <timestep> <unit>year</unit> <num>1</num> </timestep> </Landtrendr_AGB> <MODIS_LAI> <search_window>30</search_window> <export_csv>TRUE</export_csv> <run_parallel>TRUE</run_parallel> <timestep> <unit>year</unit> <num>1</num> </timestep> </MODIS_LAI> <SMAP_SMP> <search_window>30</search_window> <export_csv>TRUE</export_csv> <update_csv>FALSE</update_csv> <timestep> <unit>year</unit> <num>1</num> </timestep> </SMAP_SMP> <Soilgrids_SoilC> <timestep> <unit>year</unit> <num>1</num> </timestep> </Soilgrids_SoilC> <outdir>/projectnb/dietzelab/dongchen/All_NEON_SDA/test_OBS</outdir> <start.date>2012-07-15</start.date> <end.date>2021-07-15</end.date> </Obs_Prep> <spin.up> <start.date>2004/01/01</start.date> <end.date>2006/12/31</end.date> </spin.up> <forecast.time.step>1</forecast.time.step> <start.date>2004/01/01</start.date> <end.date>2006/12/31</end.date> </state.data.assimilation> <info> <notes></notes> <userid>-1</userid> <username></username> <date>2017/12/06 21:19:33 +0000</date> </info> <outdir>/fs/data3/hamzed/output/MultiSite_Sandbox/paleon_sda_SIPNET-8768</outdir> <database> <bety> <user>bety</user> <password>bety</password> <host>128.197.168.114</host> <dbname>bety</dbname> <driver>PostgreSQL</driver> <write>false</write> </bety> <dbfiles>/fs/data1/pecan.data/dbfiles/</dbfiles> </database> <pfts> <pft> <name>temperate.deciduous_SDA</name> <constants> <num>2</num> </constants> <outdir>/fs/data2/output//PEcAn_1000008768/pft/temperate.deciduous_SDA</outdir> <posteriorid>1000008552</posteriorid> </pft> </pfts> <meta.analysis> <iter>3000</iter> <random.effects> <on>FALSE</on> <use_ghs>TRUE</use_ghs> </random.effects> </meta.analysis> <ensemble> <size>20</size> <ensemble.id>1000016146</ensemble.id> <start.year>1995</start.year> <end.year>1999</end.year> <samplingspace> <parameters> <method>uniform</method> </parameters> <met> <method>sampling</method> </met> <soil> <parent>parameters</parent> </soil> <vegetation> <parent>soil</parent> </vegetation> </samplingspace> </ensemble> <model> <id>1000000022</id> <default.param>/fs/data3/hamzed/output/paleon_sda_SIPNET-8768/Bartlett.param</default.param> <type>SIPNET</type> <revision>r136</revision> <delete.raw>FALSE</delete.raw> <binary>/fs/data5/pecan.models/SIPNET/trunk/sipnet_ssr</binary> </model> <workflow> <id>1000008768</id> </workflow> <run> <settings.1000000650> <site> <id>1000000650</id> <met.start>1960/01/01</met.start> <met.end>1965/12/31</met.end> <name>Harvard Forest - Lyford Plots (PalEON PHA)</name> <lat>42.53</lat> <lon>-72.18</lon> </site> <inputs> <met> <source>CRUNCEP</source> <output>SIPNET</output> <path> <path1>/fs/data1/pecan.data/dbfiles/CRUNCEP_SIPNET_site_0-758/CRUNCEP.1960-01-01.2010-12-31.clim</path1> </path> </met> </inputs> <start.date>1960/01/01</start.date> <end.date>1980/12/31</end.date> </settings.1000000650> <settings.1000000651> <site> <id>1000000651</id> <met.start>1960/01/01</met.start> <met.end>1965/12/31</met.end> <name>Harvard Forest - Lyford Plots (PalEON PHA)</name> <lat>42.53</lat> <lon>-72.18</lon> </site> <inputs> <met> <source>CRUNCEP</source> <output>SIPNET</output> <path> <path1>/fs/data1/pecan.data/dbfiles/CRUNCEP_SIPNET_site_0-758/CRUNCEP.1960-01-01.2010-12-31.clim</path1> </path> </met> </inputs> <start.date>1960/01/01</start.date> <end.date>1980/12/31</end.date> </settings.1000000651> </run> <host> <name>localhost</name> <rundir>/fs/data3/hamzed/output/MultiSite_Sandbox/paleon_sda_SIPNET-8768/run</rundir> <outdir>/fs/data3/hamzed/output/MultiSite_Sandbox/paleon_sda_SIPNET-8768/out</outdir> </host> <settings.info> <deprecated.settings.fixed>TRUE</deprecated.settings.fixed> <settings.updated>TRUE</settings.updated> <checked>TRUE</checked> </settings.info> <rundir>/fs/data3/hamzed/output/MultiSite_Sandbox/paleon_sda_SIPNET-8768/run</rundir> <modeloutdir>/fs/data3/hamzed/output/MultiSite_Sandbox/paleon_sda_SIPNET-8768/out</modeloutdir> <multisettings>run</multisettings> </pecan.multi> 7.2.3 Running SDA on remote In general, the idea is that sending, running and monitoring an SDA job should all be done using two functions (SDA_remote_launcher and Remote_Sync_launcher). SDA_remote_launcher checks the XML settings defining the run, sets up the SDA on the remote machine, and then sends a qusb command for running the job. Remote_Sync_launcher, on the other hand, sits on the local machine and monitors the progress of the job(s) and brings back the outputs as long as the job is running. SDA_remote_launcher sets up the job by copying a template SDA workflow R script and a bash file template that are ready for submission to the remote machine. This function checks the paths to all inputs including met, soil, site_pft and etc., testing whether they exists on the remote machine or not. If they do not exist, the function copies the missing paths over and replaces the settings accordingly. After submitting the bash script, the function returns the PID of the job writing the log file, allowing the Remote_Sync_launcher to monitor the progress of the run, checks to see if the job is still alive, and determines if sda.output.rdata has been updated since the last check or not. Additionally, the Remote_Sync_launcher function follows the progress of the remote job by executing a nohup command on a template R script and keeps the R console open for further use. This R script, as mentioned above, constantly pings the given PID every 5 minutes and copies over the SDA output. Several points on how to prepare your xml settings for the remote SDA run: 1 - In the main pecan workflow.R, if you were able to generate pecan.TRAIT.xml, your settings are ready to be used for an SDA run. All you need to add is your state data assimilation tags. 2 - Inside the xml setting an <outdir> flag needs to be included and point to a local directory where SDA_remote_launcher will look for either a sample.Rdata file or a pft folder. 3 - You need to set your <host> tag according to the desired remote machine. You can learn more about this on the Remote execution with PEcAn section of the documentation. Please make sure that the <folder> tag inside <host> is pointing to a directory where you would like to store and run your SDA job(s). 4 - Finally, make sure the tag inside the tag is set to the correct path on the remote machine. 7.2.4 Restart functionality in SDA If you prefer to run your SDA analysis in multiple stages, where each phase picks up where the previous one left off, you can use the restart argument in the sda.enkf.multisite function. You need to make sure that the output from previous step exists in the SDA folder (in the outfolder), and the <start.date> is the same as <end.date> from the previous step. When you run the SDA with the restart parameter, it will load the output from the previous step and use configs already written in the run folder to set itself up for the next step. Using the restart argument could be as easy as : sda.enkf.multisite(settings, obs.mean =obs.mean , obs.cov = obs.cov, control = SDA.arguments(debug = FALSE, TimeseriesPlot = FALSE), restart = FALSE ) Where the new settings, obs.mean and obs.cov contain the relevant information for the next phase. 7.2.5 State Data Assimilation Methods By Ann Raiho Our goal is build a fully generalizable state data assimilation (SDA) workflow that will assimilate multiple types of data and data products into ecosystem models within PEcAn temporally and spatially. But, during development, specifically with PalEON goals in mind, we have been focusing on assimilating tree ring estimated NPP and AGB and pollen derived fractional composition into two ecosystem models, SIPNET and LINKAGES, at Harvard Forest. This methodology will soon be expanded to include the PalEON sites listed on the state data assimilation wiki page. 7.2.5.1 Data Products During workflow development, we have been working with tree ring estimated NPP and AGB and pollen derived fractional composition data products. Both of these data products have been estimated with a full accounting of uncertainty, which provides us with state variable observation mean vector and covariance matrix at each time step. These data products are discussed in more detail below. Even though we have been working with specific data products during development, our workflow is generalizable to alternative data products as long as we can calculate a state variable observation mean vector and covariance for a time point. 7.2.5.2 Tree Rings We have been primarily been working with the tree ring data product created by Andria Dawson and Chris Paciorek and the PEcAn tree ring allometry module. They have developed a Bayesian model that estimates annual aboveground biomass increment (Mg/ha/yr) and aboveground biomass (Mg/ha) for each tree in a dataset. We obtain this data and aggregate to the level appropriate for the ecosystem model. In SIPNET, we are assimilating annual gross woody increment (Mg/ha/yr) and above ground woody biomass (Mg/ha). In LINKAGES, we are assimilating annual species biomass. More information on deriving these tree ring data products can be found in Dawson et al 201?. We have been working mostly with tree data collected at Harvard Forest. Tree rings and census data were collected at Lyford Plot between 1960 and 2010 in three separate plots. Other tree ring data will be added to this analysis in the future from past PEON courses (UNDERC), Kelly Heilman (Billy’s Lake and Bigwoods), and Alex Dye (Huron Mt. Club). 7.2.5.3 Pollen STEPPS is a Bayesian model developed by Paciorek and McLachlan 2009 and Dawson et al 2016 to estimate spatially gridded fractional composition from fossil pollen. We have been working with STEPPS1 output, specifically with the grid cell that contains Harvard Forest. The temporal resolution of this data product is centennial. Our workflow currently operates at annual time steps, but does not require data at every time step. So, it is possible to assimilate fractional composition every one hundred years or to assimilate fractional composition data every year by accounting for variance inflation. In the future, pollen derived biomass (ReFAB) will also be available for data assimilation. Although, we have not discussed how STEPPS and ReFAB data assimilation will work. 7.2.5.4 Variance Inflation *Side Note: Probably want to call this something else now. Since the fractional composition data product has a centennial resolution, in order to use fractional composition information every year we need to change the weight the data has on the analysis. The basic idea is to downweight the likelihood relative to the prior to account for (a) the fact that we assimilate an observation multiple times and (b) the fact that the number of STEPPS observations is ‘inflated’ because of the autocorrelation. To do this, we take the likelihood and raise it to the power of (1/w) where ‘w’ is an inflation factor. w = D * (N / ESS) where D is the length of the time step. In our case D = 100. N is the number of time steps. In our case N = 11. and ESS is the effective sample size. The ESS is calculated with the following function where ntimes is the same as N above and sims is a matrix with the dimensions number of MCMC samples by number of state variables. ESS_calc <- function(ntimes, sims){ # center based on mean at each time to remove baseline temporal correlation # (we want to estimate effective sample size effect from correlation of the errors) row.means.sims <- sims - rowMeans(sims) # compute all pairwise covariances at different times covars <- NULL for(lag in 1:(ntimes-1)){ covars <- c(covars, rowMeans(row.means.sims[(lag+1):ntimes, , drop = FALSE] * row.means.sims[1:(ntimes-lag), , drop = FALSE])) } vars <- apply(row.means.sims, 1, var) # pointwise post variances at each time, might not be homoscedastic # nominal sample size scaled by ratio of variance of an average # under independence to variance of average of correlated values neff <- ntimes * sum(vars) / (sum(vars) + 2 * sum(covars)) return(neff) } The ESS for the STEPPS1 data product is 3.6, so w in our assimilation of fractional composition at Harvard Forest will be w = 305.6. 7.2.5.5 Current Models SIPNET and LINKAGES are the two ecosystem models that have been used during state data assimilation development within PEcAn. SIPNET is a simple ecosystem model that was built for… LINKAGES is a forest gap model created to simulate the process of succession that occurs when a gap is opened in the forest canopy. LINKAGES has 72 species level plant functional types and the ability to simulate some below ground processes (C and N cycles). 7.2.5.6 Model Calibration Without model calibration both SIPNET and LINKAGES make incorrect predictions about Harvard Forest. To confront this problem, SIPNET and LINKAGES will both be calibrated using data collected at the Harvard Forest flux tower. Istem has completed calibration for SIPNET using a parameter data assimilation emulator contained within the PEcAn workflow. LINKAGES will also be calibrated using this method. This method is also generalizable to other sites assuming there is data independent of data assimilation data available to calibrate against. 7.2.5.7 Initial Conditions The initial conditions for SIPNET are sampled across state space based on data distributions at the time when the data assimilation will begin. We do not sample LINAKGES for initial conditions and instead perform model spin up for 100 years prior to beginning data assimilation. In the future, we would like to estimate initial conditions based on data. We achieve adequate spread in the initial conditions by allowing the parameters to vary across ensemble members. 7.2.5.8 Drivers We are currently using Global Climate Model (GCM) drivers from the PaLEON model intercomparison. Christy Rollinson and John Tipton are creating MET downscaled GCM drivers for the Paleon data assimilation sites. We will use these drivers when they are available because they are a closer representation of reality. 7.2.5.9 Sequential State Data Assimilation We are using sequential state data assimilation methods to assimilate paleon data products into ecosystem models because less computation power is required for sequential state data assimilation than for particle filter methods. 7.2.5.10 General Description The general sequential data assimilation framework consists of three steps at each time step: 1. Read the state variable output for time t from the model forecast ensembles and save the forecast mean (muf) and covariance (Pf). 2. If there are data mean (y) and covariance (R) at this time step, perform data assimilation analysis (either EnKF or generalized ensemble filter) to calculate the new mean (mua) and covariance (Pa) of the state variables. 3. Use mua and Pa to restart and run the ecosystem model ensembles with new state variables for time t+1. 7.2.5.11 EnKF There are two ways to implement sequential state data assimilation at this time. The first is the Ensemble Kalman Filter (EnKF). EnKF has an analytical solution, so the kalman gain, analysis mean vector, and analysis covariance matrix can be calculated directly: K <- Pf %*% t(H) %*% solve((R + H %*% Pf %*% t(H))) ## Kalman Gain mu.a <- mu.f + K %*% (Y - H %*% mu.f) # Analysis mean vector Pa <- (diag(ncol(X)) - K %*% H) %*% Pf # Analysis covariance matrix The EnKF is typically used for sequential state data assimilation, but we found that EnKF lead to filter divergence when combined with our uncertain data products. Filter divergence led us to create a generalized ensemble filter that estimates process variance. 7.2.5.12 Generalized Ensemble Filter The generalized ensemble filter follows generally the three steps of sequential state data assimilation. But, in the generalized ensemble filter we add a latent state vector that accounts for added process variance. Furthermore, instead of solving the analysis analytically like the EnKF, we have to estimate the mean analysis vector and covariance matrix with MCMC. 7.2.5.13 Mapping Ensemble Output to Tobit Space There are some instances when we have right or left censored variables from the model forecast. For example, a model estimating species level biomass may have several ensemble members that produce zero biomass for a given species. We are considering this case a left censored state variable that needs to be mapped to normal space using a tobit model. We do this by creating two matrices with dimensions number of ensembles by state variable. The first matrix is a matrix of indicator variables (y.ind), and the second is a matrix of censored variables (y.censored). When the indicator variable is 0 the state variable (j) for ensemble member (i) is sampled. This allows us to impute a normal distribution for each state variable that contains ‘missing’ forecasts or forecasts of zero. tobit2space.model <- nimbleCode({ for(i in 1:N){ y.censored[i,1:J] ~ dmnorm(muf[1:J], cov = pf[1:J,1:J]) for(j in 1:J){ y.ind[i,j] ~ dconstraint(y.censored[i,j] > 0) } } muf[1:J] ~ dmnorm(mean = mu_0[1:J], cov = pf[1:J,1:J]) Sigma[1:J,1:J] <- lambda_0[1:J,1:J]/nu_0 pf[1:J,1:J] ~ dinvwish(S = Sigma[1:J,1:J], df = J) }) 7.2.5.14 Generalized Ensemble Filter Model Description Below is the BUGS code for the full analysis model. The forecast mean an covariance are calculated from the tobit2space model above. We use a tobit likelihood in this model because there are instances when the data may be left or right censored. Process variance is included by adding a latent model state (X) with a process precision matrix (q). We update our prior on q at each time step using our estimate of q from the previous time step. tobit.model <- nimbleCode({ q[1:N,1:N] ~ dwish(R = aq[1:N,1:N], df = bq) ## aq and bq are estimated over time Q[1:N,1:N] <- inverse(q[1:N,1:N]) X.mod[1:N] ~ dmnorm(muf[1:N], prec = pf[1:N,1:N]) ## Model Forecast ##muf and pf are assigned from ensembles ## add process error X[1:N] ~ dmnorm(X.mod[1:N], prec = q[1:N,1:N]) #agb linear #y_star[1:YN,1:YN] <- X[1:YN,1:YN] #[choose] #f.comp non linear #y_star[1:YN] <- X[1:YN] / sum(X[1:YN]) ## Analysis y.censored[1:YN] ~ dmnorm(X[1:YN], prec = r[1:YN,1:YN]) #is it an okay assumpution to just have X and Y in the same order? #don't flag y.censored as data, y.censored in inits #remove y.censored samplers and only assign univariate samplers on NAs for(i in 1:YN){ y.ind[i] ~ dconstraint(y.censored[i] > 0) } }) 7.2.5.15 Ensemble Adjustment Each ensemble member has a different set of species parameters. We adjust the updated state variables by using an ensemble adjustment. The ensemble adjustment weights the ensemble members based on their likelihood during the analysis step. S_f <- svd(Pf) L_f <- S_f$d V_f <- S_f$v ## normalize Z <- X*0 for(i in seq_len(nrow(X))){ Z[i,] <- 1/sqrt(L_f) * t(V_f)%*%(X[i,]-mu.f) } Z[is.na(Z)]<-0 ## analysis S_a <- svd(Pa) L_a <- S_a$d V_a <- S_a$v ## analysis ensemble X_a <- X*0 for(i in seq_len(nrow(X))){ X_a[i,] <- V_a %*%diag(sqrt(L_a))%*%Z[i,] + mu.a } 7.2.5.16 Diagnostics There are three diagnostics we have currently implemented: time series, bias time series, and process variance. The time series diagnostics show the data, forecast, and analysis time series for each state variable. These are useful for visually assessing variance and magnitude of change of state variables through time. These time series are also updated throughout the analysis and are also created as a pdf at the end of the SDA workflow. There are two types of bias time series the first assess the bias in the update (the forecast minus the analysis) and the second assess the bias in the error (the forecast minus the data). These bias time series are useful for identifying which state variables have intrinsic bias within the model. For example, if red oak biomass in LINKAGES increases at every time step (the update and the error are always positive), this would suggest that LINKAGES has a positive growth or recruitment bias for red oak. Finally, when using the generalized ensemble filter to estimate process variance, there are two additional plots to assess estimation of process variance. The first is a correlation plot of the process covariance matrix. This tells us what correlations are incorrectly represented by the model. For example, if red oak biomass and white pine biomass are highly negatively correlated in the process covariance matrix, this means that the model either 1) has no relationship between red oak and white pine and they should affect each other negatively or 2) there is a positive relationship between red oak and white pine and there shouldn’t be any relationship. We can determine which of these is true by comparing the process covariance matrix to the model covariance matrix. The second process variance diagnostic plot shows how the degrees of freedom associated with estimating the process covariance matrix have changed through time. This plot should show increasing degrees of freedom through time. 7.2.6 MultiSettings (TODO: Under construction…) 7.2.7 Benchmarking Benchmarking is the process of comparing model outputs against either experimental data or against other model outputs as a way to validate model performance. We have a suit of statistical comparisons that provide benchmarking scores as well as visual comparisons that help in diagnosing data-model and/or model-model differences. 7.2.7.1 Data Preparation All data that you want to compare with model runs must be registered in the database. This is currently a step that must be done by hand either from the command line or through the online BETY interface. The data must have three records: An input record (Instructions here) A database file record (Instructions here) A format record (Instructions here) 7.2.7.2 Model Runs Model runs can be setup and executed - Using the PEcAn web interface online or with a VM (see setup) - By hand using the pecan.xml 7.2.7.3 The Benchmarking Shiny App The entire benchmarking process can be done through the Benchmarking R Shiny app. When the model run has completed, navigate to the workflow visualization Shiny app. Load model data Select the workflow and run id Make sure that your model output is loading properly (i.e. you can see plots of your data) Load benchmarking data Again make sure that you can see the uploaded data plotted alongside the model output. In the future there will be more tools for double checking that your uploaded data is appropriate for benchmarking, but for now you may need to do the sanity checks by hand. 7.2.7.4 Create a reference run record Navigate to the Benchmarking tab The first step is to register the new model run as a reference run in the database. Benchmarking cannot be done before this step is completed. When the reference run record has been created, additional menus for benchmarking will appear. 7.2.7.5 Setup Benchmarks and metrics From the menus select The variables in the uploaded data that you wish to compare with model output. The numerical metrics you would like to use in your comparison. Additional comparison plots that you would like to see. Note: All these selections populate the benchmarking section of the pecan.BENCH.xml which is then saved in the same location as the original run output. This xml is purely for reference. 7.2.7.5.1 Benchmarking Output All benchmarking results are stored in the benchmarking directory which is created in the same folder as the original model run. The benchmaking directory contains subdirectories for each of the datasets compared with the model output. The names of these directories are the same as the corresponding data set’s input id in BETY. Each input directory contains benchmarking.output.Rdata, an Rdata file contianing all the results of the benchmarking workflow. load(benchmarking.output.Rdata) loads a list called result.out which contains the following: bench.results: a data frame of all numeric benchmarking scores format: a data frame that can be used to see how the input data was transformed to make it comparable to the model output. This involves converting from the original variable names and units to the internal pecan standard. aligned.dat: a data frame of the final aligned model and input values. All plots are saved as pdf files with names with “benchmark_plot-type_variable_input-id.pdf” To view interactive results, naviage to the Benchmarking Plots tab in the shiny app. 7.2.7.6 Benchmarking in pecan.xml Before reading this section, it is recommended that you familiarize yourself with basics of the pecan.xml file. The pecan.xml has an optional benchmarking section. Below are all the tags in the benchmarking section explained. Many of these field are filled in automatically during the benchmarking process when using the benchmarking shiny app. The only time one should edit the benchmarking section by hand is for performing clone runs. See clone run documentation. <benchmarking> settings: ensemble_id: the id of the ensemble that you will be using - the settings from this ensemble will be saved in a reference run record and then ensemble_id will be replaced with reference_run_id new_run: TRUE = create new run, FALSE = use existing run (required, default FALSE) It is possible to look at more than one benchmark with a particular run. The specific settings related to each benchmark are in a sub section called benchmark input_id: the id of the benchmarking data (required) variable_id: the id of the variable of interest within the data. If you leave this blank, all variables that are shared between the input and model output will be used. metric_id: the id(s) of the metric(s) to be calculated. If you leave this blank, all metrics will be used. Example: In this example, - we are using a pre-existing run from ensemble_id = 1000010983 (new_run = FALSE) - the output will be compared to data from input_id = 1000013743, specifically two variables of interest: variable_id = 411, variable_id = 18 - for variable_id = 411 we will perform only one metric of comparison metric_id = 1000000001 - for for variable_id = 18 we will perform two metrics of comparison metric_id = 1000000001, metric_id = 1000000002 <benchmarking> <ensemble_id>1000010983</ensemble_id> <new_run>FALSE</new_run> <benchmark> <input_id>1000013743</input_id> <variable_id>411</variable_id> <site_id>853</site_id> <metrics> <metric_id>1000000001</metric_id> </metrics> </benchmark> <benchmark> <input_id>1000013743</input_id> <variable_id>18</variable_id> <site_id>853</site_id> <metrics> <metric_id>1000000001</metric_id> <metric_id>1000000002</metric_id> </metrics> </benchmark> </benchmarking> "],["developer-guide.html", "8 Developer guide", " 8 Developer guide Update BETY Update PEcAn Code PEcAn and Git Coding Practices "],["updatebety.html", "8.1 Updating PEcAn Code and Bety Database", " 8.1 Updating PEcAn Code and Bety Database Release notes for all releases can be found here. This page will only list any steps you have to do to upgrade an existing system. When updating PEcAn it is highly encouraged to update BETY. You can find instructions on how to do this, as well on how to update the database in the Updating BETYdb gitbook page. 8.1.1 Updating PEcAn The latest version of PEcAn code can be obtained from the PEcAn repository on GitHub: cd pecan # If you are not already in the PEcAn directory git pull The PEcAn build system is based on GNU Make. The simplest way to install is to run make from inside the PEcAn directory. This will update the documentation for all packages and install them, as well as all required dependencies. For more control, the following make commands are available: make document – Use devtools::document to update the documentation for all package. Under the hood, this uses the roxygen2 documentation system. make install – Install all packages and their dependnencies using devtools::install. By default, this only installs packages that have had their code changed and any dependent packages. make check – Perform a rigorous check of packages using devtools::check make test – Run all unit tests (based on testthat package) for all packages, using devtools::test make clean – Remove the make build cache, which is used to track which packages have changed. Cache files are stored in the .doc, .install, .check, and .test subdirectories in the PEcAn main directory. Running make clean will force the next invocation of make commands to operate on all PEcAn packages, regardless of changes. The following are some additional make tricks that may be useful: Install, check, document, or test a specific package – make .<cmd>/<pkg-dir>; e.g. make .install/utils or make .check/modules/rtm Force make to run, even if package has not changed – make -B <command> Run make commands in parallel – make -j<ncores>; e.g. make -j4 install to install packages using four parallel processes. Note that packages containing compiled code (e.g. PEcAn.RTM, PEcAn.BASGRA) might fail when j is greater than 1, because of limitations in the way R calls make internally while compiling them. See GitHub issue 1976 for more details. All instructions for the make build system are contained in the Makefile in the PEcAn root directory. For full documentation on make, see the man pages by running man make from a terminal. "],["pecan-git.html", "8.2 Git and GitHub Workflow", " 8.2 Git and GitHub Workflow Using Git 8.2.1 Using Git This document describes the steps required to download PEcAn, make changes to code, and submit your changes. If you are new to GitHub or to PEcAn, start with the one-time set-up instructions under Before any work is done. Also see the excellent tutorials and references in the Git) section right below this list and at the bootom in References. To make trivial changes, see [Quick and Easy]. To make a few changes to the code, start with the [Basic Workflow]. To make substantial changes and/or if plan to contribute over time see [Recommended Workflow: A new branch for each change]. 8.2.1.1 Git Git is a free & open source, distributed version control system designed to handle everything from small to very large projects with speed and efficiency. Every Git clone is a full-fledged repository with complete history and full revision tracking capabilities, not dependent on network access or a central server. Branching and merging are fast and easy to do. A good place to start is the GitHub 5 minute illustrated tutorial. In addition, there are three fun tutorials for learning git: Learn Git is a great web-based interactive tutorial. LearnGitBranching TryGit. URLs In the rest of the document will use specific URL’s to clone the code. There a few URL’s you can use to clone a project, using https, ssh and git. You can use either https or git to clone a repository and write to it. The git protocol is read-only. This document describes the steps required to download PEcAn, make changes to code, and submit your changes. If during above process you want to work on something else, commit all your code, create a new branch, and work on new branch. 8.2.1.2 PEcAn Project and Github Organization Repository: https://github.com/organizations/PecanProject PEcAn source code: https://github.com/PecanProject/pecan.git BETYdb source code: https://github.com/PecanProject/bety.git These instructions apply to other repositories too. 8.2.1.3 PEcAn Project Branches We follow branch organization laid out on this page. In short, there are three main branches you must be aware of: develop - Main Branch containing the latest code. This is the main branch you will make changes to. master - Branch containing the latest stable code. DO NOT MAKE CHANGES TO THIS BRANCH. release/vX.X.X - Named branches containing code specific to a release. Only make changes to this branch if you are fixing a bug on a release branch. 8.2.1.4 Milestones, Issues, Tasks The Milestones, issues, and tasks can be used to organize specific features or research projects. In general, there is a heirarchy: milestones (Big picture, “Epic”): contains many issues, organized by release. issues (Specific features / bugs, “Story”): may contain a list of tasks; represent task list (to do list, “Tasks”): list of steps required to close an issue, e.g.: * [ ] first do this * [ ] then this * [ ] completed when x and y 8.2.1.5 Editing files on GitHub The easiest approach is to use GitHub’s browser based workflow. This is useful when your change is a few lines, if you are editing a wiki, or if the edit is trivial (and won’t break the code). The GitHub documentation is here but it is simple: finding the page or file you want to edit, click “edit” and then the GitHub web application will automatically forking and branch, then allow you to submit a pull request. However, it should be noted that unless you are a member of the PEcAn project that the “edit” button will not be active and you’ll want to follow the workflow described below for forking and then submitting a pull request. 8.2.2 Recommended Git Workflow Summary: development should occur on a fork of the main repository. Fork Create Branch Develop Push changes to your fork Create pull request from branch on your fork to develop branch on pecanproject/pecan Each feature should be in its own branch (for example each issue is a branch, names of branches are often the issue in a bug tracking system). Commit and Push Frequency On your branch, commit any time that you have done work that you do not want to re-do. Remember, pushing changes to your branch is like saving a draft. Submit a pull request when you are done. 8.2.2.1 Before any work is done The first step below only needs to be done once when you first start working on the PEcAn code. The steps below that need to be done to set up PEcAn on your computer, and would need to be repeated if you move to a new computer. All contributors should create a fork of the PEcAn source code in their own folder see github help: “fork a repo”). This forked repository will allow you to create branches and submit these changes back to GitHub using pull requests to the develop branch of PEcAn. The pull request will start a review process that will eventually result in the code being merged into the main copy of the codebase. See https://help.github.com/articles/fork-a-repo for more information, especially on how to keep your fork up to date with respect to the original. (Rstudio users should also see Git + Rstudio, below). You can setup SSH keys to make it easier to commit cod back to GitHub. This might especially be true if you are working from a cluster, see set up ssh keys There is a script in the scripts folder called scripts/syncgit.sh that will keep your fork in sync with the main pecanproject repository. Introduce yourself to GIT git config --global user.name "FULLNAME" git config --global user.email you@yourdomain.example.com Fork PEcAn on GitHub. Go to the PEcAn source code and click on the Fork button in the upper right. This will create a copy of PEcAn in your personal space. Clone to your local machine via command line git clone git@github.com:<username>/pecan.git Define PEcAnProject/pecan as upstream repository cd pecan git remote add upstream git@github.com:PecanProject/pecan.git 8.2.2.1.1 Hint: Keeping your fork in sync If you have used the instructions above, you can use the helper script called scripts/syncgit.sh to keep the master and develop branches of your own fork in sync with the PEcAnProject/pecan repository. After following the above, your .git/config file will include the following: ... [remote "origin"] url = git@github.com:<username>/pecan.git fetch = +refs/heads/*:refs/remotes/origin/* [branch "develop"] remote = origin merge = refs/heads/develop [remote "upstream"] url = git@github.com:PecanProject/pecan.git fetch = +refs/heads/*:refs/remotes/upstream/* Then, you can run: ./scripts/syncgit.sh Now the master and develop branches on your fork will be up to date. 8.2.2.2 Using Branching Ideally, a developer should create a new branch for each feature or bug fix Make sure you start in the develop branch git checkout develop Make sure develop is up to date git pull upstream develop Run the PEcAn MAKEFILE to compile code from the main directory. make Create a new branch and switch to it git checkout -b <branchname> Work/commit/etc git add <file_that_was_changed.R> git commit -m "<some descriptive information about what was done>" Make sure that code compiles and documentation updated. The make document command will run roxygenise. make document make Push this branch to your github space git push origin <branchname> submit pull request with [[link commits to issues|Using-Git#link-commits-to-issuess]]; also see github documentation 8.2.2.3 After pull request is merged Make sure you start in master git checkout develop` delete branch remotely git push origin --delete <branchname>` delete branch locally git branch -D <branchname>` 8.2.2.4 Link commits to issues You can reference and close issues from comments, pull requests, and commit messages. This should be done when you commit code that is related to or will close/fix an existing issue. There are two ways to do this. One easy way is to include the following text in your commit message: Github to close: “closes gh-xxx” (or syn. close, closed, fixes, fix, fixed) to reference: just the issue number (e.g. “gh-xxx”) 8.2.3 Useful Git tools 8.2.3.1 GitHub Desktop The easiest way to get working with GitHub is by installing the GitHub client. For instructions for your specific OS and download of the GitHub client, see https://help.github.com/articles/set-up-git. This will help you set up an SSH key to push code back to GitHub. To check out a project you do not need to have an ssh key and you can use the https or git url to check out the code. 8.2.3.2 Git + Rstudio Rstudio is nicely integrated with many development tools, including git and GitHub. It is quite easy to check out source code from within the Rstudio program or browser. The Rstudio documentation includes useful overviews of version control and R package development. Once you have git installed on your computer (see the Rstudio version control documentation for instructions), you can use the following steps to install the PEcAn source code in Rstudio. 8.2.4 Advanced 8.2.4.1 Fixing a release Branch If you would like to make changes to a release branch, you must follow a different workflow, as the release branch will not contain the latest code on develop and must remain seperate. Fetch upstream remote branches git fetch upstream Checkout the correct release branch git checkout -b release/vX.Y.Z Compile Code with make make Make changes and commit them git add <changed_file.R> git commit -m \"Describe changes\" Compile and make roxygen changes make make document Commit and push any files that were changed by make document Make a pull request. It is essential that you compare your pull request to the remote release branch, NOT the develop branch. 8.2.4.2 Tags Git supports two types of tags: lightweight and annotated. For more information see the Tagging Chapter in the Git documentation. Lightweight tags are useful, but here we discuss the annotated tags that are used for marking stable versions, major releases, and versions associated with published results. The basic command is git tag. The -a flag means ‘annotated’ and -m is used before a message. Here is an example: git tag -a v0.6 -m \"stable version with foo and bar features, used in the foobar publication by Bob\" Adding a tag to the a remote repository must be done explicitly with a push, e.g. git push v0.6 To use a tagged version, just checkout: git checkout v0.6 To tag an earlier commit, just append the commit SHA to the command, e.g. git tag -a v0.99 -m \"last version before 1.0\" 9fceb02 8.2.5 References 8.2.5.1 Git Documentation Scott Chacon, ‘Pro Git book’, http://git-scm.com/book GitHub help pages, https://help.github.com/ Main GIT page http://git-scm.com/documentation Another set of pages about branching, http://sandofsky.com/blog/git-workflow.html Stackoverflow highest voted questions tagged “git” 8.2.5.2 GitHub Documentation When in doubt, the first step is to click the “Help” button at the top of the page. GitHub Flow by Scott Chacon (Git evangelist and Ruby developer working on GitHub.com) GitHub FAQ Using Pull Requests SSH Keys 8.2.6 GitHub use with PEcAn In this section, development topics are introduced and discussed. PEcAn code lives within the If you are looking for an issue to work on, take a look through issues labled “good first issue”. To get started you will want to review We use GitHub to track development. To learn about GitHub, it is worth taking some time to read through the FAQ. When in doubt, the first step is to click the “Help” button at the top of the page. To address specific people, use a github feature called @mentions e.g. write @dlebauer, @robkooper, @mdietze, or @serbinsh … in the issue to alert the user as described in the GitHub documentation on notifications 8.2.6.1 Bugs, Issues, Features, etc. 8.2.6.2 Reporting a bug (For developers) work through debugging. Once you have identified a problem, that you can not resolve, you can write a bug report Write a bug report submit the bug report If you do find the answer, explain the resolution (in the issue) and close the issue 8.2.6.3 Required content Note: a bug is only a bug if it is reproducible clear bug reports save time Clear, specific title Description - What you did What you expected to happen What actually happened What does work, under what conditions does it fail? Reproduction steps - minimum steps required to reproduce the bug additional materials that could help identify the cause: screen shots stack traces, logs, scripts, output specific code and data / settings / configuration files required to reproduce the bug environment (operating system, browser, hardware) 8.2.6.4 Requesting a feature (from The Pragmatic Programmer, available as ebook through UI libraries, hardcopy on David’s bookshelf) focus on “user stories”, e.g. specific use cases Be as specific as possible, Here is an example: Bob is at www.mysite.edu/maps map of the the region (based on user location, e.g. US, Asia, etc) option to “use current location” is provided, if clicked, map zooms in to, e.g. state or county level for site run: option to select existing site or specify point by lat/lon option to specify a bounding box and grid resolution in either lat/lon or polar stereographic. asked to specify start and end times in terms of year, month, day, hour, minute. Time is recorded in UTC not local time, this should be indicated. 8.2.6.5 Closing an issue Definition of “Done” test documentation when issue is resolved: status is changed to “resolved” assignee is changed to original author if original author agrees that issue has been resolved original author changes status to “closed” except for trivial issues, issues are only closed by the author 8.2.6.6 When to submit an issue? Ideally, non-trivial code changes will be linked to an issue and a commit. This requires creating issues for each task, making small commits, and referencing the issue within your commit message. Issues can be created on GitHub. These issues can be linked to commits by adding text such as fixes gh-5). Rationale: This workflow is a small upfront investment that reduces error and time spent re-creating and debugging errors. Associating issues and commits, makes it easier to identify why a change was made, and potential bugs that could arise when the code is changed. In addition, knowing which issue you are working on clarifies the scope and objectives of your current task. "],["coding-practices.html", "8.3 Coding Practices", " 8.3 Coding Practices 8.3.1 Coding Style Consistent coding style improves readability and reduces errors in shared code. Unless otherwise noted, PEcAn follows the Tidyverse style guide, so please familiarize yourself with it before contributing. In addition, note the following: Document all functions using roxygen2. See Roxygen2 for more details. Put your name on things. Any function that you create or make a meaningful contribution to should have your name listed after the author tag in the function documentation. It is also often a good idea to add your name to extended comments describing particularly complex or strange code. Write unit tests with testthat. Tests are a complement to documentation - they define what a function is (and is not) expected to do. Not all functions necessarily need unit tests, but the more tests we have, the more confident we can be that changes don’t break existing code. Whenever you discover and fix a bug, it is a good idea to write a unit test that makes sure the same bug won’t happen again. See Unit_Testing for instructions, and Advanced R: Tests. Do not use abbreviations. Always write out TRUE and FALSE (i.e. do not use T or F). Do not rely on partial argument matching – write out all arguments in full. Avoid dots in function names. R’s S3 methods system uses dots to denote object methods (e.g. print.matrix is the print method for objects of class matrix), which can cause confusion. Use underscores instead (e.g. do_analysis instead of do.analysis). (NOTE that many old PEcAn functions violate this convention. The plan is to deprecate those in PEcAn 2.0. See GitHub issue #392). Use informative file names with consistent extensions. Standard file extensions are .R for R scripts, .rds for individual objects (via saveRDS function), and .RData (note: capital D!) for multiple objects (via the save function). For function source code, prefer multiple files with fewer functions in each to large files with lots of files (though it may be a good idea to group closely related functions in a single file). File names should match, or at least closely reflect, their files (e.g. function do_analysis should be defined in a file called do_analysis.R). Do not use spaces in file names – use dashes (-) or underscores (_). For using external packages, add the package to Imports: and call the corresponding function with package::function. Do not use @importFrom package function or, worse yet, @import package. (The exception is infix operators like magrittr::%>% or ggplot2::%+%, which can be imported via roxygen2 documentation like @importFrom magrittr %>%). Do not add packages to Depends. In general, try to avoid adding new dependencies (especially ones that depend on system libraries) unless they are necessary or already widely used in PEcAn (e.g. GDAL, NetCDF, XML, JAGS, dplyr). For a more thorough and nuanced discussion, see the package dependencies appendix. "],["dplyr.html", "9 dplyr", " 9 dplyr When running devtools::check() a common warning relating to dplyr code is “no visible binding for global variable ‘x’” In data-masking situations (e.g. mutate(), filter()), you can eliminate this warning by using the .data pronoun. For example, instead of df %>% mutate(newvar = oldvar + 2), use df %>% mutate(newvar = .data$oldvar + 2). In tidy-select situations (e.g. select(), rename()), you can eliminate this warning by using strings instead of naked column names. For example, instead of df %>% select(y) use df %>% select(\"y\"). Using .data inside of select() is deprecated as of tidyselect v1.2.0 9.0.1 Logging During development we often add many print statements to check to see how the code is doing, what is happening, what intermediate results there are etc. When done with the development it would be nice to turn this additional code off, but have the ability to quickly turn it back on if we discover a problem. This is where logging comes into play. Logging allows us to use “rules” to say what information should be shown. For example when I am working on the code to create graphs, I do not have to see any debugging information about the SQL command being sent, however trying to figure out what goes wrong during a SQL statement it would be nice to show the SQL statements without adding any additional code. PEcAn provides a set of logger.* functions that should be used in place of base R’s stop, warn, print, and similar functions. The logger functions make it easier to print to a system log file, and to control the level of output produced by PEcAn. The file test.logger.R provides descriptive examples This query provides an current overview of functions that use logging Logger functions and their corresponding levels (in order of increasing level): logger.debug (\"DEBUG\") – Low-level diagnostic messages that are hidden by default. Good examples of this are expanded file paths and raw results from database queries or other analyses. logger.info (\"INFO\") – Informational messages that regular users may want to see, but which do not indicate anything unexpected. Good examples of this are progress updates updates for long-running processes, or brief summaries of queries or analyses. logger.warn (\"WARN\") – Warning messages about issues that may lead to unexpected but valid results. Good examples of this are interactions between arguments that lead to some arguments being ignored or removal of missing or extreme values. logger.error (\"ERROR\") – Error messages from which PEcAn has some capacity to recover. Unless you have a very good reason, we recommend avoiding this in favor of either logger.severe to actually stop execution or logger.warn to more explicitly indicate that the problem is not fatal. logger.severe – Catastrophic errors that warrant immediate termination of the workflow. This is the only function that actually stops R’s execution (via stop). The logger.setLevel function sets the level at which a message will be printed. For instance, logger.setLevel(\"WARN\") will suppress logger.info and logger.debug messages, but will print logger.warn and logger.error messages. logger.setLevel(\"OFF\") suppresses all logger messages. To print all messages to console, use logger.setUseConsole(TRUE) 9.0.2 Package Data 9.0.2.1 Summary: Files with the following extensions will be read by R as data: plain R code in .R and .r files are sourced using source() text tables in .tab, .txt, .csv files are read using read() ** objects in R image files: .RData, .rda are loaded using load() capitalization matters all objects in foo.RData are loaded into environment pro: easiset way to store objects in R format con: format is application (R) specific Details are in ?data, which is mostly a copy of Data section of Writing R Extensions. 9.0.2.2 Accessing data Data in the [data] directory will be accessed in the following ways, efficient way: (especially for large data sets) using the data function: data(foo) # accesses data with, e.g. load(foo.RData), read(foo.csv), or source(foo.R) easy way: by adding the following line to the package DESCRIPTION: note: this should be used with caution or it can cause difficulty as discussed in redmine issue #1118 LazyData: TRUE From the R help page: Currently, a limited number of data formats can be accessed using the data function by placing one of the following filetypes in a packages’ data directory: * files ending .R or .r are source()d in, with the R working directory changed temporarily to the directory containing the respective file. (data ensures that the utils package is attached, in case it had been run via utils::data.) * files ending .RData or .rda are load()ed. * files ending .tab, .txt or .TXT are read using read.table(..., header = TRUE), and hence result in a data frame. * files ending .csv or .CSV are read using read.table(..., header = TRUE, sep = ';'), and also result in a data frame. If your data does not fall in those 4 categories, or you can use the system.file function to get access to the data: system.file("data", "ed.trait.dictionary.csv", package="PEcAn.utils") [1] "/home/kooper/R/x86_64-pc-linux-gnu-library/2.15/PEcAn.utils/data/ed.trait.dictionary.csv" The arguments are folder, filename(s) and then package. It will return the fully qualified path name to a file in a package, in this case it points to the trait data. This is almost the same as the data function, however we can now use any function to read the file, such as read.csv instead of read.csv2 which seems to be the default of data. This also allows us to store arbitrary files in the data folder, such as the the bug file and load it when we need it. 9.0.2.2.1 Examples of data in PEcAn packages outputs: [/modules/uncertainties/data/output.RData] parameter samples [/modules/uncertainties/data/samples.RData] 9.0.3 Documenting functions using roxygen2 This is the standard method for documenting R functions in PEcAn. For detailed instructions, see one of the following resources: roxygen2 pacakge documentation Formatting overview Markdown formatting Namespaces (e.g. when to use @export) From “R packages” by Hadley Wickham: Object Documentation Package Metadata Below is a complete template for a Roxygen documentation block. Note that roxygen lines start with #': #' Function title, in a few words #' #' Function description, in 2-3 sentences. #' #' (Optional) Package details. #' #' @param argument_1 A description of the argument #' @param argument_2 Another argument to the function #' @return A description of what the function returns. #' #' @author Your name <your_email@email.com> #' @examples #' \\dontrun{ #' # This example will NOT be run by R CMD check. #' # Useful for long-running functions, or functions that #' # depend on files or values that may not be accessible to R CMD check. #' my_function("~/user/my_file") #'} # # This example WILL be run by R CMD check #' my_function(1:10, argument_2 = 5) ## ^^ A few examples of the function's usage #' @export # ^^ Whether or not the function will be "exported" (made available) to the user. # If omitted, the function can only be used inside the package. my_function <- function(argument_1, argument_2) {...} Here is a complete example from the PEcAn.utils::days_in_year() function: #' Number of days in a year #' #' Calculate number of days in a year based on whether it is a leap year or not. #' #' @param year Numeric year (can be a vector) #' @param leap_year Default = TRUE. If set to FALSE will always return 365 #' #' @author Alexey Shiklomanov #' @return integer vector, all either 365 or 366 #' @export #' @examples #' days_in_year(2010) # Not a leap year -- returns 365 #' days_in_year(2012) # Leap year -- returns 366 #' days_in_year(2000:2008) # Function is vectorized over years days_in_year <- function(year, leap_year = TRUE) {...} To update documentation throughout PEcAn, run make document in the PEcAn root directory. Make sure you do this before opening a pull request – PEcAn’s automated testing (Travis) will check if any documentation is out of date and will throw an error like the following if it is: These files were changed by the build process: {...} "],["developer-testing.html", "9.1 Testing", " 9.1 Testing PEcAn uses two different kinds of testing – unit tests and integration tests. 9.1.1 Unit testing Unit tests are short (<1 minute runtime) tests of functionality of specific functions. Ideally, every function should have at least one unit test associated with it. A unit test should be written for each of the following situations: Each bug should get a regression test. The first step in handling a bug is to write code that reproduces the error This code becomes the test most important when error could re-appear essential when error silently produces invalid results Every time a (non-trivial) function is created or edited Write tests that indicate how the function should perform example: expect_equal(sum(1,1), 2) indicates that the sum function should take the sum of its arguments Write tests for cases under which the function should throw an error example: expect_error(sum(\"foo\")) better : expect_error(sum(\"foo\"), \"invalid 'type' (character)\") Any functionality that you would like to protect over the long term. Functionality that is not tested is more likely to be lost. PEcAn uses the testthat package for unit testing. A general overview of is provided in the “Testing” chapter of Hadley Wickham’s book “R packages”. Another useful resource is the testthat package documentation website. See also our testthat appendix. Below is a lightning introduction to unit testing with testthat. Each package’s unit tests live in .R scripts in the folder <package>/tests/testthat. In addition, a testthat-enabled package has a file called <packagename>/tests/testthat.R with the following contents: library(testthat) library(<packagename>) test_check("<packagename>") Tests should be placed in <packagename>/tests/testthat/test-<sourcefilename>.R, and look like the following: context("Mathematical operators") test_that("mathematical operators plus and minus work as expected",{ sum1 <- sum(1, 1) expect_equal(sum1, 2) sum2 <- sum(-1, -1) expect_equal(sum2, -2) expect_equal(sum(1,NA), NA) expect_error(sum("cat")) set.seed(0) expect_equal(sum(matrix(1:100)), sum(data.frame(1:100))) }) test_that("different testing functions work, giving excuse to demonstrate",{ expect_identical(1, 1) expect_identical(numeric(1), integer(1)) expect_equivalent(numeric(1), integer(1)) expect_warning(mean('1')) expect_that(mean('1'), gives_warning("argument is not numeric or logical: returning NA")) expect_warning(mean('1'), "argument is not numeric or logical: returning NA") expect_message(message("a"), "a") }) 9.1.2 Integration testing Integration tests consist of running the PEcAn workflow in full. One way to do integration tests is to manually run workflows for a given version of PEcAn, either through the web interface or by manually creating a pecan.xml file. Such manual tests are an important part of checking PEcAn functionality. Alternatively, the base/workflow/inst/batch_run.R script can be used to quickly run a series of user-specified integration tests without having to create a bunch of XML files. This script is powered by the PEcAn.workflow::create_execute_test_xml() function, which takes as input information about the model, meteorology driver, site ID, run dates, and others, uses these to construct a PEcAn XML file, and then uses the system() command to run a workflow with that XML. If run without arguments, batch_run.R will try to run the model configurations specified in the base/workflow/inst/default_tests.csv file. This file contains a CSV table with the following columns: model – The name of the model (models.model_name column in BETY) revision – The version of the model (models.revision column in BETY) met – The name of the meteorology driver source site_id – The numeric site ID for the model run (sites.site_id) pft – The name of the plant functional type to run. If NA, the script will use the first PFT associated with the model. start_date, end_date – The start and end dates for the model run, respectively. These should be formatted according to ISO standard (YYYY-MM-DD, e.g. 2010-03-16) sensitivity – Whether or not to run the sensitivity analysis. TRUE means run it, FALSE means do not. ensemble_size – The number of ensemble members to run. Set this to 1 to do a single run at the trait median. comment – An string providing some user-friendly information about the run. The batch_run.R script will run a workflow for every row in the input table, sequentially (for now; eventually, it will try to run them in parallel), and at the end of each workflow, will perform some basic checks, including whether or not the workflow finished and if the model produced any output. These results are summarized in a CSV table (by default, a file called test_result_table.csv), with all of the columns as the input test CSV plus the following: outdir – Absolute path to the workflow directory. workflow_complete – Whether or not the PEcAn workflow completed. Note that this is a relatively low bar – PEcAn workflows can complete without having run the model or finished some other steps. has_jobsh – Whether or not PEcAn was able to write the model’s job.sh script. This is a good indication of whether or not the model’s write.configs step was successful, and may be useful for separating model configuration errors from model execution errors. model_output_raw – Whether or not the model produced any output files at all. This is just a check to see of the <workflow>/out directory is empty or not. Note that some models may produce logfiles or similar artifacts as soon as they are executed, whether or not they ran even a single timestep, so this is not an indication of model success. model_output_processed – Whether or not PEcAn was able to post-process any model output. This test just sees if there are any files of the form YYYY.nc (e.g. 1992.nc) in the <workflow>/out directory. Right now, these checks are not particularly robust or comprehensive, but they should be sufficient for catching common errors. Development of more, better tests is ongoing. The batch_run.R script can take the following command-line arguments: --help – Prints a help message about the script’s arguments --dbfiles=<path> – The path to the PEcAn dbfiles folder. The default value is ~/output/dbfiles, based on the file structure of the PEcAn VM. Note that for this and all other paths, if a relative path is given, it is assumed to be relative to the current working directory, i.e. the directory from which the script was called. --table=<path> – Path to an alternate test table. The default is the base/workflow/inst/default_tests.csv file. See preceding paragraph for a description of the format. --userid=<id> – The numeric user ID for registering the workflow. The default value is 99000000002, corresponding to the guest user on the PEcAn VM. --outdir=<path> – Path to a directory (which will be created if it doesn’t exist) for storing the PEcAn workflow outputs. Default is batch_test_output (in the current working directory). --pecandir=<path> – Path to the PEcAn source code root directory. Default is the current working directory. --outfile=<path> – Full path (including file name) of the CSV file summarizing the results of the runs. Default is test_result_table.csv. The format of the output 9.1.3 Continuous Integration Every time anyone commits a change to the PEcAn code, the act of pushing to GitHub triggers an automated build and test of the full PEcAn codebase, and all pull requests must report a successful CI build before they will be merged. This will sometimes feel like a burden when the build breaks on an issue that looks trivial, but anything that breaks the build is important enough to fix. It’s much better to find errors early and fix them before they get incorporated into the released PEcAn code. At this writing PEcAn’s CI builds primarily use GitHub Actions and the rest of this section assumes a GitHub Actions. All our GitHub Actions builds run in a containers using different versions of R in parallel. The build will use the latest pecan/depends container for that specific R version. Each night this depends image is rebuild. Each build starts by launching a separate clean virtual machine for each R version and performs roughly the following actions on all of them: * Compile the source code in the container - Installs all the R packages that are declared as dependencies in any PEcAn package, as computed by scripts/generate_dependencies.R. - This will also check to see if any files have been modified during this step * Run the tests inside the container, and checks to see if they all pass - This will also check to see if any files have been modified during this step * Run the doxygen command inside the container - This will also check to see if any files have been modified during this step * Run the check command inside the container, and checks if there are any new warnings and/or errors - Runs package unit tests (the same ones you run locally with make test or devtools::test(pkgname)). - As discussed in Unit testing, these tests should run quickly and test individual components in relative isolation. - Any test that calls the skip_on_ci function will be skipped. This is useful for tests that need to run for a very long time (e.g. large data product downloads) or require resources that aren’t available on Travis (e.g. specific models), but be sure to run these tests locally before pushing your code! - This will also check to see if any files have been modified during this step - Any ERROR in the check output will stop the build immediately. - If there are no ERRORs, any WARNINGs or NOTEs are compared against a stored historic check result in <package>/tests/Rcheck_reference.log. If the package has no stored reference result, all WARNINGs and NOTEs are considered newly added and reported as build failures. - If all messages from the current built were also present in the reference result, the check passes. If any messages are newly added, a build failure is reported. - Each line of the check log is considered a separate message, and the test requires exact matching, so a change from Undocumented arguments in documentation object 'foo': 'x' to Undocumented arguments in documentation object 'foo': 'x', 'y' will be counted as a new warning… and you should fix both of them while you’re at it! - The idea here is to enforce good coding practice and catch likely errors in all new code while recognizing that we have a lot of legacy code whose warnings need to be fixed as we have time rather than all at once. - As we fix historic warnings, we will revoke their grandfathered status by removing them from the stored check results, so that they will break the build if they reappear. - If your PR reports a failure in pre-existing code that you think ought to be grandfathered, please fix it as part of your PR anyway. It’s frustrating to see tests complain about code you didn’t touch, but the failures all need to be cleaned up eventually and it’s likely easier to fix the error than to figure out how to re-ignore it. * Run a simple integration test using SIPNET model * Create the docker images - Once your PR is merged, it will push them to DockerHub and github container repository. * Compiles the PEcAn documentation book (book_source) and the tutorials (documentation/tutorials) and uploads them to the PEcAn website. - This is only done for commits to the master or develop branch, so changes to in-progress pull requests never change the live documentation until after they are merged. If your build fails and indicates that files have been modified there are a few common causes. It should also list the files that have changes, and what has changed. * The most common cause is that you forgot to Roxygenize before committing. * This step will also detect newly added files, e.g. tests improperly writing to the current working directory rather than tempdir() and then failing to clean up after themselves. If any of the actionsreports an error, the build is marked as “failed”. If they all pass, the GitHub actions marks the build as successful and tells the PR that it’s OK to allow your changes to be merged… but the final decision belongs to the human reviewing your code and they might still ask you for other changes! "],["download-and-compile-pecan.html", "9.2 Download and Compile PEcAn", " 9.2 Download and Compile PEcAn Set R_LIBS_USER CRAN Reference # point R to personal lib folder echo 'export R_LIBS_USER=${HOME}/R/library' >> ~/.profile source ~/.profile mkdir -p ${R_LIBS_USER} 9.2.1 Download, compile and install PEcAn from GitHub # download pecan cd git clone https://github.com/PecanProject/pecan.git # compile pecan cd pecan make For more information on the capabilities of the PEcAn Makefile, check out our section on Updating PEcAn. Following will run a small script to setup some hooks to prevent people from using the pecan demo user account to check in any code. # prevent pecan user from checking in code ./scripts/create-hooks.sh 9.2.2 PEcAn Testrun Do the run, this assumes you have installed the BETY database, sites tar file and SIPNET. # create folder cd mkdir testrun.pecan cd testrun.pecan # copy example of pecan workflow and configuration file cp ../pecan/tests/pecan32.sipnet.xml pecan.xml cp ../pecan/scripts/workflow.R workflow.R # exectute workflow rm -rf pecan ./workflow.R pecan.xml NB: pecan.xml is configured for the virtual machine, you will need to change the field from ‘/home/carya/’ to wherever you installed your ‘sites’, usually $HOME "],["directory-structure.html", "9.3 Directory structure", " 9.3 Directory structure 9.3.1 Overview of PEcAn repository as of PEcAn 1.5.3 pecan/ +- base/ # Core functions +- all # Dummy package to load all PEcAn R packages +- db # Modules for querying the database +- logger # Report warnings without killing workflows +- qaqc # Model skill testing and integration testing +- remote # Communicate with and execute models on local and remote hosts +- settings # Functions to read and manipulate PEcAn settings files +- utils # Misc. utility functions +- visualization # Advanced PEcAn visualization module +- workflow # functions to coordinate analysis steps +- book_source/ # Main documentation and developer's guide +- CHANGELOG.md # Running log of changes in each version of PEcAn +- docker/ # Experimental replacement for PEcAn virtual machine +- documentation # index_vm.html, references, other misc. +- models/ # Wrappers to run models within PEcAn +- ed/ # Wrapper scripts for running ED within PEcAn +- sipnet/ # Wrapper scripts for running SIPNET within PEcAn +- ... # Wrapper scripts for running [...] within PEcAn +- template/ # Sample wrappers to copy and modify when adding a new model +- modules # Core modules +- allometry +- data.atmosphere +- data.hydrology +- data.land +- meta.analysis +- priors +- rtm +- uncertainty +- ... +- scripts # R and Shell scripts for use with PEcAn +- shiny/ # Interactive visualization of model results +- tests/ # Settings files for host-specific integration tests +- web # Main PEcAn website files 9.3.2 Generic R package structure: see the R development wiki for more information on writing code and adding data. +- DESCRIPTION # short description of the PEcAn library +- R/ # location of R source code +- man/ # Documentation (automatically compiled by Roxygen) +- inst/ # files to be installed with package that aren't R functions +- extdata/ # misc. data files (in misc. formats) +- data/ # data used in testing and examples (saved as *.RData or *.rda files) +- NAMESPACE # declaration of package imports and exports (automatically compiled by Roxygen) +- tests/ # PEcAn testing scripts +- testthat/ # nearly all tests should use the testthat framework and live here "],["working-with-vm.html", "10 VM configuration and maintenance ", " 10 VM configuration and maintenance "],["maintain-vm.html", "10.1 Updating the VM", " 10.1 Updating the VM The PEcAn VM is distributed with specific versions of PEcAn compiled. However, you do not need to constantly download the VM in order to update your code and version of BETY. To update and maintain your code you can follow the steps found in the Developer section in Updating PecAn and Code and BETY Database. However, if you are using the latest version of the PEcAn VM (>=1.7) you have the Dockerized version of PEcAn and need to follow the instructions under the DOCKER section to update your PEcAn and BETY containers. "],["ssh-vm.html", "10.2 Connecting to the VM via SSH", " 10.2 Connecting to the VM via SSH Once the VM is running anywhere on your machine, you can connect to it from a separate terminal via SSH as follows: ssh -p 6422 carya@localhost You will be prompted for a password. Like everywhere else in PEcAn, the username is carya and the password is illinois. The same password is used for any system maintenance you wish to do on the VM via sudo. As a shortcut, you can add the following to your ~/.ssh/config file (or create one if it does not exist). Host pecan-vm Hostname localhost Port 6422 user carya ForwardX11Trusted yes This will allow you to SSH into the VM with the simplified command, ssh pecan-vm. "],["ssh-vm-bety.html", "10.3 Connecting to BETYdb on the VM via SSH", " 10.3 Connecting to BETYdb on the VM via SSH Sometimes, you may want to develop code locally but connect to an instance of BETYdb on the VM. To do this, first open a new terminal and connect to the VM while enabling port forwarding (with the -L flag) and setting the port number. Using 5433 does not conflict with the postgres default port of 5432, the forwarded port will not conflict with a postgres database server running locally. ssh -L 5433:localhost:5432 carya@localhost:6422 This makes port 5433 on the local machine match port 5432 on the VM. This means that connecting to localhost:5433 will give you access to BETYdb on the VM. To test this on the command line, try the following command, which, if successful, will drop you into the psql console. psql -d bety -U bety -h localhost -p 5433 To test this in R, open a Postgres using the analogous parameters: library(DBI) library(RPostgres) con <- dbConnect( drv = Postgres(), user = "bety", password = "bety", dbname = "bety", host = "localhost", port = 5433 ) dbListTables(con) # This should return a vector of bety tables Note that the same general approach will work on any BETYdb server where port forwarding is enabled, but it requires ssh access. 10.3.1 Using Amazon Web Services for a VM (AWS) Login to Amazon Web Services (AWS) and select the EC2 Dashboard. If this is your first time using AWS you will need to set up an account before you are able to access the EC2 Dashboard. Important: You will need a credit card number and access to a phone to be able to verify AWS account registration. AWS is free for one year. Choose AMI On the top right next to your name, make sure the location setting is on U.S. East (N. Virginia), not U.S. West (Oregon) On the left click, click on EC2 (Virtual servers), then click on “AMIs”, also on the left In the search window toggle to change “Owned by me” to “Public images” Type “pecan” into the search window Click on the toggle button on the left next to PEcAn1.4.6 Click on the “Launch” button at the top Choose an Instance Type Select what type of machine you want to run. For this demo the default, t2.micro, will be adequate. Be aware that different machine types incur very different costs, from 1.3 cents/hour to over $5/hr https://aws.amazon.com/ec2/pricing/ Select t2.micro, then click “Next: Configure Instance Details” Configure Instance Details The defaults are OK. Click “Next: Add Storage” Add Storage The defaults are OK. Click “Next: Tag Instance” Tag Instance You can name your instance if you want. Click “Next: Configure Security Group” Configure Security Group You will need to add two new rules: Click “Add Rule” then select “HTTP” from the pull down menu. This rule allows you to access the webserver on PEcAn. Click “Add Rule”, leave the pull down on “Custom TCP Rule”, and then change the Port Range from 0 to 8787. Set “Source” to Anywhere. This rule allows you to access RStudio Server on PEcAn. Click “Review and Launch” . You will then see this pop-up: Select the default drive volume type and click Next Review and Launch Review the settings and then click “Launch”, which will pop up a select/create Key Pair window. Key Pair Select “Create a new key pair” and give it a name. You won’t actually need this key unless you need to SSH into your PEcAn server, but AWS requires you to create one. Click on “Download Key Pair” then on “Launch Instances”. Next click on “View Instances” at the bottom of the following page. Instances You will see the status of your PEcAn VM, which will take a minute to boot up. Wait until the Instance State reads “running”. The most important piece of information here is the Public IP, which is the URL you will need in order to access your PEcAn instance from within your web browser (see Demo 1 below). Be aware that it often takes ~1 hr for AWS instances to become fully operational, so if you get an error when you put the Public IP in you web browser, most of the time you just need to wait a bit longer. Congratulations! You just started a PEcAn server in the “cloud”! When you are done using PEcAn, you will want to return to the “Instances” menu to turn off your VM. To STOP the instance (which will turn the machine off but keep your work), select your PEcAn instance and click Actions > Instance state > Stop. Be aware that a stopped instance will still accrue a small storage cost on AWS. To restart this instance at any point in the future you do not want to repeat all the steps above, but instead you just need to select your instance and then click Actions > Instance state > Start To TERMINATE the instance (which will DELETE your PEcAn machine), select your instance and click Actions > Instance state > Terminate. Terminated instances will not incur costs. In most cases you will also want to go to the Volumes menu and delete the storage associated with your PEcAn VM.Remember, AWS is free for one year, but will automatically charge a fee in second year if account is not cancelled. 10.3.2 Creating a Virtual Machine First create virtual machine # ---------------------------------------------------------------------- # CREATE VM USING FOLLOWING: # - VM NAME = PEcAn # - CPU = 2 # - MEMORY = 2GB # - DISK = 100GB # - HOSTNAME = pecan # - FULLNAME = PEcAn Demo User # - USERNAME = xxxxxxx # - PASSWORD = yyyyyyy # - PACKAGE = openssh # ---------------------------------------------------------------------- To enable tunnels run the following on the host machine: VBoxManage modifyvm "PEcAn" --natpf1 "ssh,tcp,,6422,,22" VBoxManage modifyvm "PEcAn" --natpf1 "www,tcp,,6480,,80" Make sure machine is up to date. UBUNTU sudo apt-get update sudo apt-get -y dist-upgrade sudo reboot CENTOS/REDHAT sudo yum -y update sudo reboot Install compiler and other packages needed and install the tools. UBUNTU sudo apt-get -y install build-essential linux-headers-server dkms CENTOS/REDHAT sudo yum -y groupinstall "Development Tools" sudo yum -y install wget Install Virtual Box additions for better integration sudo mount /dev/cdrom /mnt sudo /mnt/VBoxLinuxAdditions.run sudo umount /mnt sudo usermod -a -G vboxsf carya Finishing up the machine Add a message to the login: sudo -s export PORT=$( hostname | sed 's/pecan//' ) cat > /etc/motd << EOF PEcAn version 1.4.3 For more information about: Pecan - http://pecanproject.org BETY - http://www.betydb.org For a list of all models currently navigate [here](../users_guide/basic_users_guide/models_table.md) You can access this system using a webbrowser at http://<hosting machine>:${PORT}80/ or using SSH at ssh -l carya -p ${PORT}22 <hosting machine> where <hosting machine> is the machine where the VM runs on. EOF exit Finishing up Script to clean the VM and remove as much as possible history cleanvm.sh wget -O ~/cleanvm.sh http://isda.ncsa.uiuc.edu/~kooper/EBI/cleanvm.sh chmod 755 ~/cleanvm.sh Make sure machine has SSH keys rc.local sudo wget -O /etc/rc.local http://isda.ncsa.illinois.edu/~kooper/EBI/rc.local Change the resolution of the console sudo sed -i -e 's/#GRUB_GFXMODE=640x480/GRUB_GFXMODE=1024x768/' /etc/default/grub sudo update-grub Once all done, stop the virtual machine history -c && ${HOME}/cleanvm.sh "],["pecan-standards.html", "11 PEcAn standard formats ", " 11 PEcAn standard formats "],["defining-new-input-formats.html", "11.1 Defining new input formats", " 11.1 Defining new input formats New formats can be defined on the ‘formats’ page of BETYdb After creating a new format, the contents should be defined by specifying the BETYdb variable name and the name used in the file/ "],["time-standard.html", "11.2 Time Standard", " 11.2 Time Standard Internal PEcAn standard time follows ISO_8601 format for dates and time (https://en.wikipedia.org/wiki/ISO_8601). For example ordinal dates go from 1 365/366 (https://en.wikipedia.org/wiki/ISO_8601#Ordinal_dates). However, time used in met drivers or model outputs follows CF convention with julian dates following the 0 to 364/365 format To aid in the conversion between PEcAn internal ISO_8601 standard and CF convention used in all met drivers and PEcAn standard output you can utilize the functions: “cf2datetime”,“datetime2doy”,“cf2doy”, and for SIPNET “sipnet2datetime” 11.2.1 Input Standards 11.2.1.1 Meterology Standards 11.2.1.1.1 Dimensions: CF standard-name units time days since 1700-01-01 00:00:00 UTC longitude degrees_east latitude degrees_north General Note: dates in the database should be date-time (preferably with timezone), and datetime passed around in PEcAn should be of type POSIXct. 11.2.1.1.2 The variable names should be standard_name preferred variables indicated in bold wind_direction has no CF equivalent and should not be converted, instead the met2CF functions should convert wind_direction and wind_speed to eastward_wind and northward_wind standard_name is CF-convention standard names units can be converted by udunits, so these can vary (e.g. the time denominator may change with time frequency of inputs) soil moisture for the full column, rather than a layer, is soil_moisture_content A full list of PEcAn standard variable names, units and dimensions can be found here: https://github.com/PecanProject/pecan/blob/develop/base/utils/data/standard_vars.csv For example, in the MsTMIP-CRUNCEP data, the variable rain should be precipitation_rate. We want to standardize the units as well as part of the met2CF.<product> step. I believe we want to use the CF “canonical” units but retain the MsTMIP units any time CF is ambiguous about the units. The key is to process each type of met data (site, reanalysis, forecast, climate scenario, etc) to the exact same standard. This way every operation after that (extract, gap fill, downscale, convert to a model, etc) will always have the exact same inputs. This will make everything else much simpler to code and allow us to avoid a lot of unnecessary data checking, tests, etc being repeated in every downstream function. 11.2.2 Soils and Vegetation Inputs 11.2.2.0.1 Soil Data Check out the Soil Data section on more into on creating a standard soil data file. 11.2.2.0.2 Vegetation Data Check Out the Vegetation Data section on more info on creating a standard vegetation data file 11.2.3 Output Standards created by model2netcdf functions based on format used by MsTMIP Can be seen at HERE We originally used the MsTMIP conventions. Since then, we’ve added the PaLEON variable conventions to our standard as well. If a variable isn’t in one of those two, we stick to the CF conventions. "],["pecanXML.html", "12 The PEcAn XML", " 12 The PEcAn XML The PEcAn system is configured using a XML file, often called pecan.xml. It contains the following major sections (“nodes”): Core configuration Top level structure info – Run metadata outdir – Output directory database – PEcAn database settings pft – Plant functional type selection meta.analysis – Trait meta analysis model – Model configuration run – Run setup host – Host information for remote execution Advanced features ensemble – Ensemble runs sensitivity.analysis – Sensitivity analysis parameter.data.assimilation – Parameter data assimilation multi.settings – Multi Site Settings (experimental) state.data.assimilation – State data assimilation (experimental) browndog – Brown Dog configuration (experimental) benchmarking – Benchmarking remote_process – Remote data module A basic example looks like this: <?xml version="1.0" encoding="UTF-8"?> <pecan> <info> <notes>Example run</notes> <userid>-1</userid> <username>guestuser</username> <date>2018/09/18 19:12:28 +0000</date> </info> <outdir>/data/workflows/PEcAn_99000000006</outdir> <database> <bety> <user>bety</user> <password>bety</password> <host>postgres</host> <dbname>bety</dbname> <driver>PostgreSQL</driver> <write>true</write> </bety> <dbfiles>/data/dbfiles</dbfiles> </database> <pfts> <pft> <name>tundra.grasses</name> <constants> <num>1</num> </constants> </pft> </pfts> <meta.analysis> <iter>3000</iter> <random.effects> <on>FALSE</on> <use_ghs>TRUE</use_ghs> </random.effects> </meta.analysis> <ensemble> <size>1</size> <variable>NPP</variable> <samplingspace> <parameters> <method>uniform</method> </parameters> <met> <method>sampling</method> </met> </samplingspace> </ensemble> <model> <id>5000000002</id> </model> <workflow> <id>99000000006</id> </workflow> <run> <site> <id>1000000098</id> <met.start>2004/01/01</met.start> <met.end>2004/12/31</met.end> </site> <inputs> <met> <source>CRUNCEP</source> <output>SIPNET</output> </met> </inputs> <start.date>2004/01/01</start.date> <end.date>2004/12/31</end.date> </run> <host> <name>localhost</name> <rabbitmq> <uri>amqp://guest:guest@rabbitmq:5672/%2F</uri> <queue>SIPNET_136</queue> </rabbitmq> </host> </pecan> In the following sections, we step through each of these sections in detail. "],["xml-core-config.html", "12.1 Core configuration", " 12.1 Core configuration 12.1.1 Top-level structure The first line of the XML file should contain version and encoding information. <?xml version="1.0" encoding="UTF-8"?> The rest of the XML file should be surrounded by <pecan>...</pecan> tags. <pecan> ...XML body here... </pecan> 12.1.2 info: Run metadata This section contains run metadata. This information is not essential to a successful model run, but is useful for tracking run provenance. <info> <notes>Example run</notes> <userid>-1</userid> <username>guestuser</username> <date>2018/09/18 19:12:28 +0000</date> </info> The <notes> tag will be filled in by the web GUI if you provide notes, or you can add notes yourself within these tags. We suggest adding notes that help identify your run and a brief description of what the run is for. Because these notes are searchable within the PEcAn database and web interface, they can be a useful way to distinguish between similar runs. The <userid> and <username> section is filled in from the GUI if you are signed in. If you are not using the GUI, add the user name and ID you are associated with that exists within the PEcAn database. The <date></date> tag is filled automatically at the time of your run from the GUI. If you are not using the GUI, add the date you execute the run. This tag is not the tag for the dates you would like to run your model simulation. 12.1.3 outdir: Output directory The <outdir> tag is used to configure the output folder used by PEcAn. This is the directory where all model input and output files will be stored. By default, the web interface names this folder PEcAn_<workflow ID>, and higher-level location is set by the $output_folder$ variable in the web/config.php file. If no outdir is specified, PEcAn defaults to the working directory from which it is called, which may be counterintuitive. <outdir>/data/workflows/PEcAn_99000000006</outdir> 12.1.4 database: PEcAn database settings 12.1.4.1 bety: PEcAn database (Bety) configuration The bety tag defines the driver to use to connect to the database (we support PostgreSQL, which is the default, and Postgres) and parameters required to connect to the database. Note that connection parameters are passed exactly as entered to the underlying R database driver, and any invalid or extra parameters will result in an error. In other words, this configuration… <database> ... <bety> <user>bety</user> <password>bety</password> <host>postgres</host> <dbname>bety</dbname> <driver>PostgreSQL</driver> <port>5432</port> <write>true</write> </bety> ... </database> …will be translated (by way of PEcAn.DB::db.open(settings$database$bety)) into R code like the following: con <- DBI::dbConnect( drv = RPostgreSQL::PostgreSQL(), user = "bety", password = "bety", dbname = "bety", host = "postgres", port = "5432", write = TRUE ) Common parameters are described as follows: driver: The driver to use to connect to the database. This should always be set to PostgreSQL, unless you absolutely know what you’re doing. dbname: The name of the database (formerly name), corresponding to the -d argument to psql. In most cases, this should be set to bety, and will only be different if you named your Bety instance something else (e.g. if you have multiple instances running at once). If unset, it will default to the user name of the current user, which is usually wrong! user: The username to connect to the database (formerly userid), corresponding to the -U argument to psql. default value is the username of the current user logged in (PostgreSQL uses user for this field). password: The password to connect to the database (was passwd), corresponding to the -p argument to psql. If unspecified, no password is used. On standard PEcAn installations, the username and password are both bety (all lowercase). host: The hostname of the bety database, corresponding to the -h argument to psql. On the VM, this will be localhost (the default). If using docker, this will be the name of the PostgreSQL container, which is postgres if using our standard docker-compose. If connecting to the PEcAn database on a remote server (e.g. psql-pecan.bu.edu), this should be the same as the hostname used for ssh access. write: Logical. If true (the default), write results to the database. If false, PEcAn will run but will not store any information to bety. When using the web interface, this section is configured by the web/config.php file. The default config.php settings on any given platform (VM, Docker, etc.) or in example files (e.g. config.php.example) are a good place to get default values for these fields if writing pecan.xml by hand. Key R functions using these parameters are as follows: PEcAn.DB::db.open – Open a database connection and create a connection object, which is used by many other functions for communicating with the PEcAn database. 12.1.4.2 dbfiles: Location of database files The dbfiles is a path to the local location of files needed to run models using PEcAn, including model executables and inputs. <database> ... <dbfiles>/data/dbfiles</dbfiles> ... </database> 12.1.4.3 (Experimental) fia: FIA database connection parameters If a version of the FIA database is available, it can be configured using <fia> node, whose syntax is identical to that of the <bety> node. <database> ... <fia> <dbname>fia5data</dbname> <username>bety</username> <password>bety</password> <host>localhost</host> </fia> ... </database> Currently, this is only used for extraction of specific site vegetation information (notably, for ED2 css, pss, and site files). Stability not ensured as of 1.5.3. 12.1.5 pft: Plant functional type selection The PEcAn system requires at least 1 plant functional type (PFT) to be specified inside the <pfts> section. <pfts> <pft> <name>tundra.grasses</name> <constants> <num>1</num> </constants> <posterior.files>Path to a post.distns.*.Rdata or prior.distns.Rdata</posterior.files> </pft> </pfts> name : (required) the name of the PFT, which must exactly match the name in the PEcAn database. outdir: (optional) Directory path in which PFT-specific output will be stored during meta-analysis and sensitivity analysis. If not specified (recommended), it will be written into <outdir>/<pftname>. contants: (optional) this section contains information that will be written directly into the model specific configuration files. For example, some models like ED2 use PFT numbers instead of names for PFTs, and those numbers can be specified here. See documentation for model-specific code for details. posterior.files (Optional) this tag helps to signal write.config functions to use specific posterior/prior files (such as HPDA or MA analysis) for generating samples without needing to access to the bety database. `` This information is currently used by the following PEcAn workflow function: get.traits - ?????? 12.1.6 meta.analysis: Trait Meta Analysis The section meta.analysis needs to exists for a meta.analysis to be executed, even though all tags inside are optional. Conversely, if you do not want to do a trait meta-analysis (e.g. if you want to manually set all parameters), you should omit this node. <meta.analysis> <iter>3000</iter> <random.effects> <on>FALSE</on> <use_ghs>TRUE</use_ghs> </random.effects> </meta.analysis> Some of the tags that can go in this section are: iter: MCMC (Markov Chain Monte Carlo) chain length, i.e. the total number of posterior samples in the meta-analysis, default is 3000. Smaller numbers will run faster but produce larger errors. random.effects: Settings related to whether to include random effects (site, treatment) in meta-analysis model. on: Default is set to FALSE to work around convergence problems caused by an over parameterized model (e.g. too many sites, not enough data). Can be turned to TRUE for including hierarchical random effects. use_ghs: Default is set to TRUE to include greenhouse measurements. Can be set to FALSE to exclude cases where all data is from greenhouse. update: Should previous results of meta.analysis and get.traits be re-used. If set to TRUE the meta-analysis and get.trait.data will always be executed. Setting this to FALSE will try and reuse existing results. Future versions will allow for AUTO as well which will try and reuse if the PFT/traits have not changed. The default value is FALSE. threshold: threshold for Gelman-Rubin convergence diagnostic (MGPRF); default is 1.2. This information is currently used by the following PEcAn workflow function: PEcAn.MA::run.meta.analysis - ??? 12.1.7 model: Model configuration This section describes which model PEcAn should run and some instructions for how to run it. <model> <id>7</id> <type>ED2</type> <binary>/usr/local/bin/ed2.r82</binary> <prerun>module load hdf5</prerun> <config.header> <!--...xml code passed directly to config file...--> </config.header> </model> Some important tags are as follows: id – The unique numeric ID of the model in the PEcAn database models table. If this is present, then type and binary are optional since they can be determined from the PEcAn database. type – The model “type”, matching the PEcAn database modeltypes table name column. This also refers to which PEcAn model-specific package will be used. In PEcAn, a “model” refers to a specific version (e.g. release, git commit) of a specific model, and “model type” is used to link different releases of the same model. Model “types” also have specific PFT definitions and other requirements associated with them (e.g. the ED2 model “type” requires a global land cover database). binary – The file path to the model executable. If omitted, PEcAn will use whatever path is registered in the PEcAn database for the current machine. prerun – Additional options added to the job.sh script, which is used to execute the model. This is useful for setting specific environment variables, load modules, etc. This information is currently used by the following PEcAn workflow function: PEcAn.<MODEL>::write.config.<MODEL> – Write model-specific configuration files PEcAn.workflow::start_model_runs – Begin model run execution 12.1.7.1 Model-specific configuration See the following: ED2 Configuration SIPNET Configuration BIOCRO Configuration 12.1.7.2 ED2 specific tags Following variables are ED specific and are used in the ED2 Configuration. Starting at 1.3.7 the tags for inputs have moved to <run><inputs>. This includes, veg, soil, psscss, inputs. <edin>/home/carya/runs/PEcAn_4/ED2IN.template</edin> <config.header> <radiation> <lai_min>0.01</lai_min> </radiation> <ed_misc> <output_month>12</output_month> </ed_misc> </config.header> <phenol.scheme>0</phenol.scheme> edin : [required] template used to write ED2IN file veg : OBSOLETE [required] location of VEG database, now part of <run><inputs> soil : OBSOLETE [required] location of soild database, now part of <run><inputs> psscss : OBSOLETE [required] location of site inforation, now part of <run><inputs>. Should be specified as <pss>, <css> and <site>. inputs : OBSOLETE [required] location of additional input files (e.g. data assimilation data), now part of <run><inputs>. Should be specified as <lu> and <thsums>. 12.1.8 run: Run Setup This section provides detailed configuration for the model run, including the site and time period for the simulation and what input files will be used. <run> <site> <id>1000000098</id> <met.start>2004/01/01</met.start> <met.end>2004/12/31</met.end> <site.pft> <pft.name>temperate.needleleaf.evergreen</pft.name> <pft.name>temperate.needleleaf.evergreen.test</pft.ame> </site.pft> </site> <inputs> <met> <source>CRUNCEP</source> <output>SIPNET</output> </met> <poolinitcond> <source>NEON_veg</source> <output>poolinitcond</output> <ensemble>100</ensemble> <startdate>2019-01-01</startdate> <enddate>2019-12-31</enddate> <storedir>/projectnb/dietzelab/neon_store</storedir> </poolinitcond> </inputs> <start.date>2004/01/01</start.date> <end.date>2004/12/31</end.date> <stop_on_error>TRUE</stop_on_error> </run> 12.1.8.1 site: Where to run the model This contains the following tags: id – This is the numeric ID of the site in the PEcAn database (table sites, column id). PEcAn can automatically fill in other relevant information for the site (e.g. name, lat, lon) using the site ID, so those fields are optional if ID is provided. name – The name of the site, as a string. lat, lon – The latitude and longitude coordinates of the site, as decimals. met.start, met.end – ??? <site.pft> (optional) If this tag is found under the site tag, then PEcAn automatically makes sure that only PFTs defined under this tag is used for generating parameter’s samples. Following shows an example of how this tag can be added to the PEcAn xml : <site.pft> <pft.name>temperate.needleleaf.evergreen</pft.name> <pft.name>temperate.needleleaf.evergreen</pft.name> </site.pft> For multi-site runs if the pft.site tag (see {#xml-run-inputs}) is defined under input, then the above process will be done automatically under prepare settings step in PEcAn main workflow and there is no need for adding the tags manually. Using the pft.site tag however, requires a lookup table as an input (see {#xml-run-inputs}). 12.1.8.2 inputs: Model inputs Models require several different types of inputs to run. Exact requirements differ from model to model, but common inputs include meteorological/climate drivers, site initial conditions (e.g. vegetation composition, carbon pools), and land use drivers. In general, all inputs should have the following tags: id: Numeric ID of the input in the PEcAn database (table inputs, column id). If not specified, PEcAn will try to figure this out based on the source tag (described below). path: The file path of the input. Usually, PEcAn will set this automatically based on the id (which, in turn, is determined from the source). However, this can be set manually for data that PEcAn does not know about (e.g. data that you have processed yourself and have not registered with the PEcAn database). source: The input data type. This tag name needs to match the names in the corresponding conversion functions. If you are using PEcAn’s automatic input processing, this is the only field you need to set. However, this field is ignored if id and/or path are provided. output: ??? The following are the most common types of inputs, along with their corresponding tags: 12.1.8.2.1 met: Meteorological inputs (Under construction. See the PEcAn.data.atmosphere package, located in modules/data.atmosphere, for more details.) 12.1.8.2.2 (Experimental) soil: Soil inputs (Under construction. See the PEcAn.data.land package, located in modules/data.land, for more details). 12.1.8.2.3 (Experimental) veg: Vegetation initial conditions 12.1.8.2.4 poolinitcond: initial condition inputs source: Data source of initial condition .rd veg files ex: NEON_veg, FIA. output: This tag can only must match the cooresponding tag in the modeltypes_formats table that matches with your model type ex for SIPNET model tag is poolinitcond. ensemble: Number of initial conditions ensemble member files ex: 30. (Under construction. Follow developments in the PEcAn.data.land package, located in modules/data.land in the source code). 12.1.8.2.5 pft.site Multi-site site / PFT mapping When performing multi-site runs, it is not uncommon to find that different sites need to be run with different PFTs, rather than running all PFTs at all sites. If you’re interested to use a specific PFT for your site/sites you can use the following tag to tell PEcAn which PFT needs to be used for what site. <pft.site> <path>site_pft.csv</path> </pft.site> For example using the above tag, user needs to have a csv file named site_pft stored in the pecan folder. At the moment we have functions supporting just the .csv and .txt files which are comma separated and have the following format: site_id, pft_name 1000025731,temperate.broadleaf.deciduous 764,temperate.broadleaf.deciduous Then pecan would use this lookup table to inform write.ensemble.config function about what PFTs need to be used for what sites. 12.1.8.3 start.date and end.date The start and end date for the run, in a format parseable by R (e.g. YYYY/MM/DD or YYYY-MM-DD). These dates are inclusive; in other words, they refer to the first and last days of the run, respectively. NOTE: Any time-series inputs (e.g. meteorology drivers) must contain all of these dates. PEcAn tries to detect and throw informative errors when dates are out of bounds inputs, but it may not know about some edge cases. 12.1.8.4 Other tags The following tags are optional run settings that apply to any model: jobtemplate: the template used when creating a job.sh file, which is used to launch the actual model. Each model has its own default template in the inst folder of the corresponding R package (for instance, here is the one for ED2). The following variables can be used: @SITE_LAT@, @SITE_LON@, @SITE_MET@, @START_DATE@, @END_DATE@, @OUTDIR@, @RUNDIR@ which all come variables in the pecan.xml file. The following two command can be used to copy and clean the results from a scratch folder (specified as scratch in the run section below, for example local disk vs network disk) : @SCRATCH_COPY@, @SCRATCH_CLEAR@. stop_on_error: (logical) Whether the workflow should immediately terminate if any of the model runs fail. If unset, this defaults to TRUE unless you are running an ensemble simulation (and ensemble size is greater than 1). Some models also have model-specific tags, which are described in the PEcAn Models section. 12.1.9 host: Host information for remote execution This section provides settings for remote model execution, i.e. any execution that happens on a machine (including “virtual” machines, like Docker containers) different from the one on which the main PEcAn workflow is running. A common use case for this section is to submit model runs as jobs to a high-performance computing cluster. If no host tag is provided, PEcAn assumes models are run on localhost, a.k.a. the same machine as PEcAn itself. For detailed instructions on remote execution, see the Remote Execution page. For detailed information on configuring this for RabbitMQ, see the RabbitMQ page. The following provides a quick overview of XML tags related to remote execution. NOTE: Any paths specified in the pecan.xml refer to paths on the host specified in this section, /not/ the machine on which PEcAn is running (unless models are running on localhost or this section is omitted). <host> <name>pecan2.bu.edu</name> <rundir>/fs/data3/guestuser/pecan/testworkflow/run</rundir> <outdir>/fs/data3/guestuser/pecan/testworkflow/out</outdir> <scratchdir>/tmp/carya</scratchdir> <clearscratch>TRUE</clearscratch> <qsub>qsub -N @NAME@ -o @STDOUT@ -e @STDERR@ -S /bin/bash</qsub> <qsub.jobid>Your job ([0-9]+) .*</qsub.jobid> <qstat>'qstat -j @JOBID@ &> /dev/null || echo DONE'</qstat> <prerun>module load udunits R/R-3.0.0_gnu-4.4.6</prerun> <cdosetup>module load cdo/2.0.6</cdosetup> <modellauncher> <binary>/usr/local/bin/modellauncher</binary> <qsub.extra>-pe omp 20</qsub.extra> </modellauncher> </host> The host section has the following tags: name: [optional] name of host server where model is located and executed, if not specified localhost is assumed. folder: [required] The location to store files on the remote machine. For localhost this is optional (<outdir> is the default), for any other host this is required. rundir: [optional] location where all the configuration files are written. If not specified, this will be generated starting at the path specified in folder. outdir: [optional] location where all the outputs of the model are written. If not specified, this will be generated starting at the path specified in folder. scratchdir: [optional] location where output is written. If specified the output from the model is written to this folder and copied to the outdir when the model is finished, this could significantly speed up the model execution (by using local or ram disk). clearscratch: [optional] if set to TRUE the scratchfolder is cleaned up after copying the results to the outdir, otherwise the folder will be left. The default is to clean up after copying. qsub: [optional] the command to submit a job to the queuing system. There are 3 parameters you can use when specifying the qsub command, you can add additional values for your specific setup (for example -l walltime to specify the walltime, etc). You can specify @NAME@ the pretty name, @STDOUT@ where to write stdout and @STDERR@, where to write stderr. You can specify an empty element (<qsub/>) in which case it will use the default value is qsub -V -N @NAME@ -o @STDOUT@ -e @STDERR@ -s /bin/bash. qsub.jobid: [optional] the regular expression used to find the jobid returned from qsub. If not specified (and qsub is) it will use the default value is Your job ([0-9]+) .* qstat: [optional] the command to execute to check if a job is finished, this should return DONE if the job is finished. There is one parameter this command should take @JOBID@ which is the ID of the job as returned by qsub.jobid. If not specified (and qsub is) it will use the default value is qstat -j @JOBID@ || echo DONE prerun: [optional] additional options to add to the job.sh at the top. cdosetup: [optional] additional options to add to the job.sh at the top, specifically work for the CDO Linux command line package, allowing the user to generate the full year netCDF file through model2netcdf.SIPNET function (there has been an issue while this function will iteratively overlap the previous netCDF file, resulting in partial data loss). modellauncher: [optional] this is an experimental section that will allow you to submit all the runs as a single job to a HPC system. The modellauncher section if specified will group all runs together and only submit a single job to the HPC cluster. This single job will leverage of a MPI program that will execute a single run. Some HPC systems will place a limit on the number of jobs that can be executed in parallel, this will only submit a single job (using multiple nodes). In case there is no limit on the number of jobs, a single PEcAn run could potentially submit a lot of jobs resulting in the full cluster running jobs for a single PEcAn run, preventing others from executing on the cluster. The modellauncher has 3 arguments: * binary : [required] The full path to the binary modellauncher. Source code for this file can be found in pecan/contrib/modellauncher](https://github.com/PecanProject/pecan/tree/develop/contrib/modellauncher). * qsub.extra : [optional] Additional flags to pass to qsub besides those specified in the qsub tag in host. This option can be used to specify that the MPI environment needs to be used and the number of nodes that should be used. * mpirun: [optional] Additional commands to be added to the front of launcher.sh. Default is mpirun <path to binary> <path to joblist.txt>. Edit this to, for example, load necessary modules to run mpirun. "],["xml-advanced.html", "12.2 Advanced features", " 12.2 Advanced features 12.2.1 ensemble: Ensemble Runs As with meta.analysis, if this section is missing, then PEcAn will not do an ensemble analysis. <ensemble> <size>1</size> <variable>NPP</variable> <samplingspace> <parameters> <method>uniform</method> </parameters> <met> <method>sampling</method> </met> </samplingspace> </ensemble> An alternative configuration is as follows: <ensemble> <size>5</size> <variable>GPP</variable> <start.year>1995</start.year> <end.year>1999</end.year> <samplingspace> <parameters> <method>lhc</method> </parameters> <met> <method>sampling</method> </met> </samplingspace> </ensemble> Tags in this block can be broken down into two categories: Those used for setup (which determine how the ensemble analysis runs) and those used for post-hoc analysis and visualization (i.e. which do not affect how the ensemble is generated). Tags related to ensemble setup are: size : (required) the number of runs in the ensemble. samplingspace: (optional) Contains tags for defining how the ensembles will be generated. Each piece in the sampling space can potentially have a method tag and a parent tag. Method refers to the sampling method and parent refers to the cases where we need to link the samples of two components. When no tag is defined for one component, one sample will be generated and used for all the ensembles. This allows for partitioning/studying different sources of uncertainties. For example, if no met tag is defined then, one met path will be used for all the ensembles and as a result the output uncertainty will come from the variability in the parameters. At the moment no sampling method is implemented for soil and vegetation. Available sampling methods for parameters can be found in the documentation of the PEcAn.utils::get.ensemble.samples function. For the cases where we need simulations with a predefined set of parameters, met and initial condition we can use the restart argument. Restart needs to be a list with name tags of runid, inputs, new.params (parameters), new.state (initial condition), ensemble.id (ensemble ids), start.time, and stop.time. The restart functionality is developed using model specific functions by called write_restart.modelname. You need to make sure first that this function is already exist for your desired model. Note: if the ensemble size is set to 1, PEcAn will select the posterior median parameter values rather than taking a single random draw from the posterior Tags related to post-hoc analysis and visualization are: variable: (optional) name of one (or more) variables the analysis should be run for. If not specified, sensitivity.analysis variable is used, otherwise default is GPP (Gross Primary Productivity). (NOTE: This static visualization functionality will soon be deprecated as PEcAn moves towards interactive visualization tools based on Shiny and htmlwidgets). This information is currently used by the following PEcAn workflow functions: PEcAn.<MODEL>::write.config.<MODEL> - See above. PEcAn.uncertainty::write.ensemble.configs - Write configuration files for ensemble analysis PEcAn.uncertainty::run.ensemble.analysis - Run ensemble analysis 12.2.2 sensitivity.analysis: Sensitivity analysis Only if this section is defined a sensitivity analysis is done. This section will have <quantile> or <sigma> nodes. If neither are given, the default is to use the median +/- [1 2 3] x sigma (e.g. the 0.00135 0.0228 0.159 0.5 0.841 0.977 0.999 quantiles); If the 0.5 (median) quantile is omitted, it will be added in the code. <sensitivity.analysis> <quantiles> <sigma>-3</sigma> <sigma>-2</sigma> <sigma>-1</sigma> <sigma>1</sigma> <sigma>2</sigma> <sigma>3</sigma> </quantiles> <variable>GPP</variable> <perpft>TRUE</perpft> <start.year>2004</start.year> <end.year>2006</end.year> </sensitivity.analysis> quantiles/sigma : [optional] The number of standard deviations relative to the standard normal (i.e. “Z-score”) for which to perform the ensemble analysis. For instance, <sigma>1</sigma> corresponds to the quantile associated with 1 standard deviation greater than the mean (i.e. 0.681). Use a separate <sigma> tag, all under the <quantiles> tag, to specify multiple quantiles. Note that we do not automatically add the quantile associated with -sigma – i.e. if you want +/- 1 standard deviation, then you must include both <sigma>1</sigma> and <sigma>-1</sigma>. start.date : [required?] start date of the sensitivity analysis (in YYYY/MM/DD format) end.date : [required?] end date of the sensitivity analysis (in YYYY/MM/DD format) NOTE: start.date and end.date are distinct from values set in the run tag because this analysis can be done over a subset of the run. variable : [optional] name of one (or more) variables the analysis should be run for. If not specified, sensitivity.analysis variable is used, otherwise default is GPP. perpft : [optional] if TRUE a sensitivity analysis on PFT-specific outputs will be run. This is only possible if your model provides PFT-specific outputs for the variable requested. This tag only affects the output processing, not the number of samples proposed for the analysis nor the model execution. This information is currently used by the following PEcAn workflow functions: PEcAn.<MODEL>::write.configs.<MODEL> – See above PEcAn.uncertainty::run.sensitivity.analysis – Executes the uncertainty analysis 12.2.3 Parameter Data Assimilation The following tags can be used for parameter data assimilation. More detailed information can be found here: Parameter Data Assimilation Documentation 12.2.4 Multi-Settings Multi-settings allows you to do multiple runs across different sites. This customization can also leverage site group distinctions to expedite the customization. It takes your settings and applies the same settings, changing only the site level tags across sites. To start, add the multisettings tag within the <run></run> section of your xml <multisettings> <multisettings>run</multisettings> <multisettings> Additional tags for this section exist and can fully be seen here: <multisettings> <multisettings>assim.batch</multisettings> <multisettings>ensemble</multisettings> <multisettings>sensitivity.analysis</multisettings> <multisettings>run</multisettings> </multisettings> These tags correspond to different pecan analysis that need to know that there will be multiple settings read in. Next you’ll want to add the following tags to denote the group of sites you want to use. It leverages site groups, which are defined in BETY. <sitegroup> <id>1000000022</id> </sitegroup> If you add this tag, you must remove the <site> </site> tags from the <run> tag portion of your xml. The id of your sitegroup can be found by lookig up your site group within BETY. You do not have to use the sitegroup tag. You can manually add multiple sites using the structure in the example below. Lastly change the top level tag to <pecan.multi>, meaning the top and bootom of your xml should look like this: <?xml version="1.0"?> <pecan.multi> ... </pecan.multi> Once you have defined these tags, you can run PEcAn, but there may be further specifications needed if you know that different data sources have different dates available. Run workflow.R up until # Write pecan.CHECKED.xml PEcAn.settings::write.settings(settings, outputfile = "pecan.CHECKED.xml") Once this section is run, you’ll need to open pecan.CHECKED.xml. You will notice that it has expanded from your original pecan.xml. <run> <settings.1> <site> <id>796</id> <met.start>2005/01/01</met.start> <met.end>2011/12/31</met.end> <name>Bartlett Experimental Forest (US-Bar)</name> <lat>44.06464</lat> <lon>-71.288077</lon> </site> <start.date>2005/01/01</start.date> <end.date>2011/12/31</end.date> <inputs> <met> <path>/fs/data1/pecan.data/dbfiles/AmerifluxLBL_SIPNET_site_0-796/AMF_US-Bar_BASE_HH_4-1.2005-01-01.2011-12-31.clim</path> </met> </inputs> </settings.1> <settings.2> <site> <id>767</id> <met.start>2001/01/01</met.start> <met.end>2014/12/31</met.end> <name>Morgan Monroe State Forest (US-MMS)</name> <lat>39.3231</lat> <lon>-86.4131</lon> </site> <start.date>2001/01/01</start.date> <end.date>2014/12/31</end.date> <inputs> <met> <path>/fs/data1/pecan.data/dbfiles/AmerifluxLBL_SIPNET_site_0-767/AMF_US-MMS_BASE_HR_8-1.2001-01-01.2014-12-31.clim</path> </met> </inputs> </settings.2> .... </run> The ... replaces the rest of the site settings for however many sites are within the site group. Looking at the example above, take a close look at the <met.start></met.start> and <met.end></met.end>. You will notice that for both sites, the dates are different. In this example they were edited by hand to include the dates that are available for that site and source. You must know your source prior. Only the source CRUNCEP has a check that will tell you if your dates are outside the range available. PEcAn will automatically populate these dates across sites according the original setting of start and end dates. In addition, you will notice that the <path></path> section contains the model specific meteorological data file. You can add that in by hand or you can you can leave the normal tags that met process workflow will use to process the data into your model specific format: <met> <source>AmerifluxLBL</source> <output>SIPNET</output> <username>pecan</username> </met> 12.2.5 (experimental) State Data Assimilation The following tags can be used for state data assimilation. More detailed information can be found here: State Data Assimilation Documentation <state.data.assimilation> <process.variance>TRUE</process.variance> <aqq.Init>1</aqq.Init> <bqq.Init>1</bqq.Init> <sample.parameters>FALSE</sample.parameters> <adjustment>TRUE</adjustment> <censored.data>FALSE</censored.data> <FullYearNC>TRUE</FullYearNC> <NC.Overwrite>FALSE</NC.Overwrite> <NC.Prefix>sipnet.out</NC.Prefix> <q.type>SINGLE</q.type> <free.run>FALSE</free.run> <Localization.FUN>Local.support</Localization.FUN> <scalef>1</scalef> <chains>5</chains> <state.variables> <variable> <variable.name>AbvGrndWood</variable.name> <unit>MgC/ha</unit> <min_value>0</min_value> <max_value>9999</max_value> </variable> <variable> <variable.name>LAI</variable.name> <unit></unit> <min_value>0</min_value> <max_value>9999</max_value> </variable> <variable> <variable.name>SoilMoistFrac</variable.name> <unit></unit> <min_value>0</min_value> <max_value>100</max_value> </variable> <variable> <variable.name>TotSoilCarb</variable.name> <unit>kg/m^2</unit> <min_value>0</min_value> <max_value>9999</max_value> </variable> </state.variables> <Obs_Prep> <Landtrendr_AGB> <AGB_input_dir>/projectnb/dietzelab/dongchen/Multi-site/download_500_sites/AGB</AGB_input_dir> <allow_download>TRUE</allow_download> <export_csv>TRUE</export_csv> <timestep> <unit>year</unit> <num>1</num> </timestep> </Landtrendr_AGB> <MODIS_LAI> <search_window>30</search_window> <export_csv>TRUE</export_csv> <run_parallel>TRUE</run_parallel> <timestep> <unit>year</unit> <num>1</num> </timestep> </MODIS_LAI> <SMAP_SMP> <search_window>30</search_window> <export_csv>TRUE</export_csv> <update_csv>FALSE</update_csv> <timestep> <unit>year</unit> <num>1</num> </timestep> </SMAP_SMP> <Soilgrids_SoilC> <timestep> <unit>year</unit> <num>1</num> </timestep> </Soilgrids_SoilC> <outdir>/projectnb/dietzelab/dongchen/All_NEON_SDA/test_OBS</outdir> <start.date>2012-07-15</start.date> <end.date>2021-07-15</end.date> </Obs_Prep> <spin.up> <start.date>2004/01/01</start.date> <end.date>2006/12/31</end.date> </spin.up> <forecast.time.step>1</forecast.time.step> <start.date>2004/01/01</start.date> <end.date>2006/12/31</end.date> </state.data.assimilation> process.variance : [optional] TRUE/FLASE flag for if process variance should be estimated (TRUE) or not (FALSE). If TRUE, a generalized ensemble filter will be used. If FALSE, an ensemble Kalman filter will be used. Default is FALSE. aqq.Init : [optional] The initial value of aqq used for estimate the Q distribution, the default value is 1 (note that, the aqq.init and bqq.init right now only work on the VECTOR q type, and we didn’t account for the variabilities of them across sites or variables, meaning we initialize the aqq and bqq given single value). bqq.Init : [optional] The initial value of bqq used for estimate the Q distribution, the default value is 1. sample.parameters : [optional] TRUE/FLASE flag for if parameters should be sampled for each ensemble member or not. This allows for more spread in the intial conditions of the forecast. adjustment : [optional] Bool variable decide if you want to adjust analysis results by the likelihood. censored.data : [optional] Bool variable decide if you want to do MCMC sampling for the forecast ensemble space, the default is FALSE. FullYearNC : [optional] Bool variable decide if you want to generate the full-year netcdf file when there is a overlap in time, the default is TRUE. NC.Overwrite : [optional] Bool variable decide if you want to overwrite the previous netcdf file when there is a overlap in time, the default is FALSE. NC.Prefix : [optional] The prefix for the generation of the full-year netcdf file, the default is sipnet.out. q.type : [optional] The type of process variance that will be estimated, the default is SINGLE. free.run : [optional] If it’s a free run without any observations, the default is FALSE. Localization.FUN : [optional] The localization function name for the localization operation, the default is Local.support. scalef : [optional] The scale parameter used for the localization operation, the smaller the value is, the sites are more isolated. chains : [optional] The number of chains needed to be estimated during the MCMC sampling process. NOTE: If TRUE, you must also assign a vector of trait names to pick.trait.params within the sda.enkf function. state.variable : [required] State variable that is to be assimilated (in PEcAn standard format, with pre-specified variable name, unit, and range). Four variables can be assimilated so far: including Aboveground biomass (AbvGrndWood), LAI, SoilMoistFrac, and Soil carbon (TotSoilCarb). Obs_Prep : [required] This section will be handled through the SDA_Obs_Assembler function, if you want to proceed with this function, this section is required. spin.up : [required] start.date and end.date for model spin up. NOTE: start.date and end.date are distinct from values set in the run tag because spin up can be done over a subset of the run. forecast.time.step : [optional] start.date and end.date for model spin up. start.date : [required?] start date of the state data assimilation (in YYYY/MM/DD format) end.date : [required?] end date of the state data assimilation (in YYYY/MM/DD format) NOTE: start.date and end.date are distinct from values set in the run tag because this analysis can be done over a subset of the run. 12.2.6 State Variables for State Data Assimilation The following tags can be used for documentation of state variables required for the SDA workflow. <state.data.assimilation> <state.variables> <variable> <variable.name>AbvGrndWood</variable.name> <unit>MgC/ha</unit> <min_value>0</min_value> <max_value>9999</max_value> </variable> <variable> <variable.name>LAI</variable.name> <unit></unit> <min_value>0</min_value> <max_value>9999</max_value> </variable> <variable> <variable.name>SoilMoistFrac</variable.name> <unit></unit> <min_value>0</min_value> <max_value>100</max_value> </variable> <variable> <variable.name>TotSoilCarb</variable.name> <unit>kg/m^2</unit> <min_value>0</min_value> <max_value>9999</max_value> </variable> </state.variables> </state.data.assimilation> variable : [required] Each of this section should include each state variables separately. variable.name : [required] The name of the state variable (same as what we call it in the model output file. for example, “SIPNET.out” file). unit : [required] The unit of the state variable (can be empty if it’s unitless). min_value : [required] The minimum range of the state variable. max_value : [required] The maximum range of the state variable. 12.2.7 Observation Preparation for State Data Assimilation The following tags can be used for observation preparation for the SDA workflow. <state.data.assimilation> <Obs_Prep> <Landtrendr_AGB> <AGB_indir>/projectnb/dietzelab/dongchen/Multi-site/download_500_sites/AGB</AGB_indir> <allow_download>TRUE</allow_download> <export_csv>TRUE</export_csv> <timestep> <unit>year</unit> <num>1</num> </timestep> </Landtrendr_AGB> <MODIS_LAI> <search_window>30</search_window> <export_csv>TRUE</export_csv> <run_parallel>TRUE</run_parallel> <timestep> <unit>year</unit> <num>1</num> </timestep> </MODIS_LAI> <SMAP_SMP> <search_window>30</search_window> <export_csv>TRUE</export_csv> <update_csv>FALSE</update_csv> <timestep> <unit>year</unit> <num>1</num> </timestep> </SMAP_SMP> <Soilgrids_SoilC> <timestep> <unit>year</unit> <num>1</num> </timestep> </Soilgrids_SoilC> <outdir>/projectnb/dietzelab/dongchen/All_NEON_SDA/test_OBS</outdir> <start.date>2012-07-15</start.date> <end.date>2021-07-15</end.date> </Obs_Prep> </state.data.assimilation> The Obs_Prep section is specifically designed for the SDA_OBS_Assembler function within the state.data.assimilation section, which helps construct the obs.mean and obs.cov objects more efficiently. Within the Obs_Prep section, the user needs to specify data streams that will be proceeded in the assembler function as DataSource_AbbreviationOfData (for example: Landtrendr_AGB for Landtrendr aboveground biomass observations, or Soilgrids_SoilC for soilgrids soil organic carbon concentration). There are four functions that can be used for preparing observations: MODIS_LAI_prep, Soilgrids_SoilC_prep, SMAP_SMP_prep, and Landtrendr_AGB_prep functions. Other than that, the output directory, and start and end date need to be set within the Obs_Prep section, note that if you choose to export csv file for any data, you will get the corresponding csv file just under the outdir, for the Rdata of obs.mean and obs.cov objects, they will be stored whthin the Rdata folder within the outdir, if the Rdata folder is not created, then the assembler will help to create this folder automatically. * outdir : [required] This tag is for every observation preparation function, which points to the directory where the csv file associated with each data stream should be generated, read, or updated. * start.date : [required] This tag defines the exact start point of time, which differs from the start.date in the state.data.assimilation section in a way that it is the time when first observation should be taken. * end.date : [required] This tag defines the exact end point of time, which differs from the end.date in the state.data.assimilation section in a way that it is the time when last observation should be taken. Within each data-specific section (for example, within Landtrendr_AGB section), there are few general settings need to be set like the export_csv and timestep arguments. * export_csv : [optional] This tag is for every observation preparation function, which decide if we want to export the csv file to the outdir. Normally the outdir should be given in order to set export_csv as TRUE. The default value is TRUE. * timestep : [optional] This tag defines the time unit and how many unit time per each time step. It supports year, month, week, and day as time unit. If you assign a general timestep just under the Obs_Prep section, all variable will be handled with the same timestep. If you assign each variable with each timestep, then the assembler function will create obs.mean and obs.cov with mixed temporal resolution. The default value is list(unit=“year”, num=1). There are also several settings that are data-specific, for example, the AGB_indir only applies to Landtrendr AGB data that points to the directory where the AGB files are stored. Beyond that, the allow_download is also AGB specific, which decide if you want to download the AGB files if some are missing. * AGB_indir : [required] This tag is only for Landtrendr_AGB_prep function, which points to the directory where the pre-downloaded AGB files existed. * allow_download : [optional] This tag is only for Landtrendr_AGB_prep function, which decide if we want to proceed to download the AGB files (that will take a while, and a lot of space) if some of them are detected to be empty. The default value is FALSE. For the MODIS LAI and SMAP soil moisture observations, the search_window specifies how many days you want to search for the data with desired dates. Beyond that, the update_csv is specifically used for the SMAP data, which decide if you want to reproduce formatted csv file based on the downloaded SMAP_gee.csv file. For the run_parallel in MODIS LAI settings, it specify if you want to proceed the function parallely. * search_window : [optional] This tag is used for LAI and SMAP observation preparation functions, which defines the search window within which the closest observation to the target date is selected. The default value is 30 days. * update_csv : [optional] This tag is used for SMAP preparation function only, which decide if we want to update the CSV file. Normally, we will only set it to be TRUE if we redownload the SMAP_GEE csv file from Google Earth Engine, the tutorial can be found in the SMAP_SMP_prep function itself. The default value is FALSE. * run_parallel : [optional] This tag defines if you want to proceed the MODIS LAI function parallely, the default value is FALSE. 12.2.8 (experimental) Brown Dog This section describes how to connect to Brown Dog. This facilitates processing and conversions of data. <browndog> <url>...</url> <username>...</username> <password>...</password> </browndog> url: (required) endpoint for Brown Dog to be used. username: (optional) username to be used with the endpoint for Brown Dog. password: (optional) password to be used with the endpoint for Brown Dog. This information is currently used by the following R functions: PEcAn.data.atmosphere::met.process – Generic function for processing meteorological input data. PEcAn.benchmark::load_data – Generic, versatile function for loading data in various formats. 12.2.9 (experimental) Benchmarking Coming soon… 12.2.10 Remote data module This section describes the tags required for configuring remote_process. <remotedata> <out_get_data>...</out_get_data> <source>...</source> <collection>...</collection> <scale>...</scale> <projection>...</projection> <qc>...</qc> <algorithm>...</algorithm> <credfile>...</credfile> <out_process_data>...</out_process_data> <overwrite>...</overwrite> </remotedata> out_get_data: (required) type of raw output requested, e.g, bands, smap source: (required) source of remote data, e.g., gee or AppEEARS collection: (required) dataset or product name as it is provided on the source, e.g. “COPERNICUS/S2_SR” for gee or “SPL3SMP_E.003” for AppEEARS scale: (optional) pixel resolution required for some gee collections, recommended to use 10 for Sentinel 2 projection: (optional) type of projection. Only required for AppEEARS polygon AOI type qc: (optional) quality control parameter, required for some gee collections overwrite: (optional) if TRUE database checks will be skipped and existing data of same type will be replaced entirely. When processed data is requested, the raw data required for creating it will also be replaced. By default FALSE These tags are only required if processed data is requested: out_process_data: (optional) type of processed output requested, e.g, LAI algorithm: (optional) algorithm used for processing data, currently only SNAP is implemented to estimate LAI from Sentinel-2 bands credfile: (optional) absolute path to JSON file containing Earthdata username and password, only required for AppEEARS pro_mimetype: (optional) MIME type of the processed file pro_formatname: (optional) format name of the processed file Additional information for the module are taken from the registration files located at data.remote/inst/registration The output data from the module are returned in the following tags: raw_id: input id of the raw file raw_path: absolute path to the raw file pro_id: input id of the processed file pro_path: absolute path to the processed file "],["workflow.html", "13 PEcAn workflow (web/workflow.R)", " 13 PEcAn workflow (web/workflow.R) How the workflow works How each module is called How to do outside of web interface Link to “folder structure” section below for detailed descriptions "],["workflow-readsettings.html", "13.1 Read Settings", " 13.1 Read Settings (TODO: Under construction…) "],["workflow-input.html", "13.2 Input Conversions", " 13.2 Input Conversions "],["workflow-input-data.html", "13.3 Input Data", " 13.3 Input Data Models require input data as drivers, parameters, and boundary conditions. In order to make a variety of data sources that have unique formats compatible with models, conversion scripts are written to convert them into a PEcAn standard format. That format is a netcdf file with variables names and specified to our standard variable table. Within the PEcAn repository, code pertaining to input conversion is in the MODULES directory under the data.atmosphere and data.land directories. "],["workflow-input-initial.html", "13.4 Initial Conditions", " 13.4 Initial Conditions To convert initial condition data into the PEcAn Standard and then into the model formats we follow three main steps: Downloading vegetation info -converting the vegetation info into .rd file format Creating ensemble members based on veg file -resamples veg file to create ensmeble members Converting ensemble members to model specific format Common Questions regarding IC Data: How do I know if my data product is supported by ic_process workflow? -The current data products supported by ic_process are: NEON, FIA, and BADM. That said you if your vegetation files are registered with BETY and abide by the BETY formats and column names you will be able to use ic_process to create ensemble members and convert the ensemble members to model specific format. See section 6.3.3 for more info. Can I use ic_process outside of the PEcAn workflow? -Short answer is yes, ic_process can be called as a function all by itself outside of do_conversions. What about soil initial conditions? -Please reference section 6.3.6 for more info. ic_process workflow: Required inputs for ic_process are: settings object with and sections (see pecan.xml section for more info), dir or location where you would to store the output files, input object that contains all info from settings\\(run\\)inputs$poolinitcond, and the overwrite setting the default is overwrite = FALSE. get_veg_module() This function will either download your vegetation info from: NEON_veg, FIA, BADM sources or query BETY for existing veg files previously downloaded. The input and dbfile ids of your veg file are then added to a list getveg.id. ens_veg_module() This function will create ensemble member ncdf files by resampling the veg file supplied in getveg.id. put_veg_module() This function will convert the ensemble member ncdf files into model specific format. Currently the supported models are ED2 and SIPNET. "],["workflow-met.html", "13.5 Meteorological Data", " 13.5 Meteorological Data To convert meterological data into the PEcAn Standard and then into model formats we follow four main steps: Downloading raw data - Currently supported products - Example Code Converting raw data into a CF standard - Example Code Downscaling and gapfilling - Example Code Coverting to Model Specific format - Example Code Common Questions regarding Met Data: How do I add my Meterological data product to PEcAn? How do I use PEcAn to convert Met data outside the workflow? The main script that handles Met Processing, is met.process. It acts as a wrapper function that calls individual modules to facilitate the processing of meteorological data from it’s original form to a pecan standard, and then from that standard to model specific formats. It also handles recording these processes in the BETY database. Downloading raw data - Available Meteorological Drivers - Example Code to download Ameriflux data Converting raw data into a CF standard (if needed) - Example Code to convert from raw csv to CF standard Downscaling and gapfilling(if needed) - Example Code to gapfill Coverting to Model Specific format - Example Code to convert Standard into Sipnet format 13.5.1 Downloading Raw data (Description of Process) Given the information passed from the pecan.xml met.process will call the download.raw.met.module to facilitate the execution of the necessary functions to download raw data. <met> <source>AmerifluxLBL</source> <output>SIPNET</output> <username>pecan</username> </met> 13.5.2 Converting raw data to PEcAn standard 13.5.3 Downscaling and gapfilling (optional) 13.5.4 Converting from PEcAn standard to model-specific format "],["workflow-traits.html", "13.6 Traits", " 13.6 Traits (TODO: Under construction) "],["workflow-metaanalysis.html", "13.7 Meta Analysis", " 13.7 Meta Analysis (TODO: Under construction) "],["workflow-modelconfig.html", "13.8 Model Configuration", " 13.8 Model Configuration (TODO: Under construction) "],["workflow-modelrun.html", "13.9 Run Execution", " 13.9 Run Execution (TODO: Under construction) "],["workflow-postrun.html", "13.10 Post Run Analysis", " 13.10 Post Run Analysis (TODO: Under construction) ## Advanced Analysis {#workflow-advanced} (TODO: Under construction) "],["pecan-models.html", "14 PEcAn Models", " 14 PEcAn Models This section will contain information about all models and output variables that are supported by PEcAn. Model Name Available in the VM Prescribed Inputs Input Functions/Values Restart Function BioCro Yes Yes Yes No CLM No No No No DALEC Yes Yes Yes No ED2 Yes Yes Yes Yes FATES No Yes No GDAY No No No No LDNDC No Yes No No LINKAGES Yes Yes Yes Yes LPJ-GUESS No Yes No No MAESPA Yes Yes No No PRELES Yes Yes Partially No SiPNET Yes Yes Yes Yes STICS Yes Yes No No Available in the VM - Denotes if a model is publicly available with PEcAn. Prescribed Inputs - Denotes whether or not PEcAn can prescribe inputs. Input Functions/Values - Denotes whether or not PEcAn has functions to fully produce a model’s Input values. Restart Function - Denotes status of model data assimilation capabilities. Output Variables PEcAn converts all model outputs to a single Output Standards. This standard evolved out of MsTMIP project, which is itself based on NACP, LBA, and other model-intercomparison projects. This standard was expanded for the PalEON MIP and the needs of the PEcAn modeling community to support variables not in these standards. Model developers: do not add variables to your PEcAn output without first adding them to the PEcAn standard table! Also, do not create new variables equivalent to existing variables but just with different names or units. "],["models-biocro.html", "14.1 BioCro", " 14.1 BioCro Model Information Home Page https://github.com/ebimodeling/biocro/blob/0.951/README.md Source Code https://github.com/ebimodeling/biocro/tree/0.951 License University of Illinois/NCSA Open Source License Authors Fernando E. Miguez, Deepak Jaiswal, Justin McGrath, David LeBauer, Scott Rohde, Dan Wang PEcAn Integration David LeBauer, Chris Black, Kristina Riemer 14.1.1 Introduction BioCro is a model that estimates photosynthesis at the leaf, canopy, and ecosystem levels and determines plant biomass allocation and crop yields, using underlying physiological and ecological processes to do so. 14.1.2 PEcAn configuration file additions The following sections of the PEcAn XML are relevant to the BioCro model: model revision – Model version number run site/id – ID associated with desired site from BETYdb site entry inputs met/output – Set as BIOCRO met/path – Path to file containing meteorological data 14.1.3 Model specific input files List of inputs required by model, such as met, etc. 14.1.4 Model configuration files Genus-specific parameter files are secretly required. These are stored in the PEcAn.BIOCRO package and looked up under the hood. write.configs.BIOCRO looks for defaults in this order: first any file at a path specified by settings$pft$constants$file, next by matching the genus name in datasets exported by the BioCro package, and last by matching the genus name in PEcAn.BIOCRO’s extdata/defaults directory. When adding a new genus, it is necessary to provide a new default parameter file in PEcAn.BIOCRO inst/extdata/defaults and also (for v<1.0) update the call_biocro() function. BioCro uses a config.xml file similar to ED2. At this time, no other template files are required. 14.1.5 Installation notes BioCro can be run standalone using the model’s R package. Instructions for installing and using the package are in the GitHub repo’s README file. PEcAn works with v. 0.9x and 1.x. Support for v 2.x is not implemented. Support for v 0.9x is most robust and tested. To install, use remotes::install_github('ebimodeling/biocro@0.951') "],["models-clm.html", "14.2 CLM", " 14.2 CLM Model Information Home Page Source Code License Authors PEcAn Integration Introduction Introduction about model PEcAn configuration file additions Should list the model specific additions to the PEcAn file here Model specific input files List of inputs required by model, such as met, etc. Model configuration files MODEL is configured using 3 files which are placed in the run folder, as well as a symbolic link to the met file. file1 : template for this file is located at models/MODEL/inst/file1 and is not modified. file2 : template for this file is located at models/MODEL/inst/file2 and is not modified. file3 : template for this file is in models/MODEL/inst/file3 or it is specified in the <model> section as <template>. The values in this template are replaced by those computed in the earlier stages of PEcAN. Installation notes This section contains notes on how to compile the model. The notes for the VM might work on other machines or configurations as well. VM "],["models-dalec.html", "14.3 DALEC", " 14.3 DALEC Model Information Home Page Source Code License Authors PEcAn Integration Introduction Introduction about model PEcAn configuration file additions Should list the model specific additions to the PEcAn file here Model specific input files List of inputs required by model, such as met, etc. Model configuration files MODEL is configured using 3 files which are placed in the run folder, as well as a symbolic link to the met file. file1 : template for this file is located at models/MODEL/inst/file1 and is not modified. file2 : template for this file is located at models/MODEL/inst/file2 and is not modified. file3 : template for this file is in models/MODEL/inst/file3 or it is specified in the <model> section as <template>. The values in this template are replaced by those computed in the earlier stages of PEcAN. Installation notes This section contains notes on how to compile the model. The notes for the VM might work on other machines or configurations as well. VM "],["models-ed.html", "14.4 ED2", " 14.4 ED2 Model Information Home Page http://moorcroftlab.oeb.harvard.edu/ Source Code https://github.com/EDmodel/ED2 License Authors Paul Moorcroft, … PEcAn Integration Michael Dietze, Rob Kooper 14.4.1 Introduction Introduction about ED model 14.4.2 PEcAn configuration file additions The following sections of the PEcAn XML are relevant to the ED model: model id – BETY model ID. Each ID corresponds to a different revision of ED (see below) revision – The revision (a.k.a. release) number of ED (e.g. “r82”). “rgit” indicates the latest code on the ED repository. edin – Name of the template ED2IN configuration file. If this is a functional path that points to a specific file, that file is used. If no file is found following the path literally, the workflow will try the path relative to the PEcAn.ED2 package using system.file (recall that files in inst are moved in the package root, but all other directory structure is preserved). If this is omitted, PEcAn.ED2::write.configs.ED2 will look for a file called ED2IN.<revision> (e.g. ED2IN.rgit, ED2IN.r86) in the PEcAn.ED2 package. Example: <edin>ED2IN.rgit</edin> will use the ED2IN.rgit file shipped with PEcAn.ED2 regardless of the revision of ED used. (Note however that if a file called ED2IN.rgit exists in the workflow runtime directory, that file will be used instead). start.date, end.date – Run start and end date, respectively met.start, met.end – Start and end year of meteorology inputs. By default (if omitted), these are set to the years of start.date and end.date, respectively. Setting these values to a shorter interval than start.date and end.date will cause ED to loop the meteorology input over the specified years. This may be useful for, for example, spinning up ED under a set of constant climate conditions. phenol.scheme phenol phenol.start phenol.end all_pfts – (Logical) If false or missing (default), only run ED2 with the PFTs configured via PEcAn (i.e. in the <pfts> section of the XML). If true, run with all 17 of ED2’s PFTs, using ED2’s internal default parameters for all PFTs not configured through PEcAn. See below for more details. ed2in_tags – Named list of additional tags in the ED2IN to be modified. These modifications override any of those set by other parts of the PEcAn workflow. These tags must be in all caps. Any tags that are not already in the ED2IN file will be added; this makes this an effective way to run newer versions of ED2 that have new ED2IN parameters without having to provide an entire new ED2IN. For example: <model> <ed2in_tags> <IOOUTPUT>0</IOOUTPUT> <PLANT_HYDRO_SCHEME>0</PLANT_HYDRO_SCHEME> <ISTOMATA_SCHEME>0</ISTOMATA_SCHEME> <ISTRUCT_GROWTH_SCHEME>0</ISTRUCT_GROWTH_SCHEME> <TRAIT_PLASTICITY_SCHEME>0</TRAIT_PLASTICITY_SCHEME> </ed2in_tags> </model> barebones_ed2in – Whether or not to try to annotate the ED2IN file with comments. If “true”, skip all comments and only write the tags themselves. If “false” (default), try to transfer comments from the template file into the target file. jobtemplate prerun – String of commands to be added to the job.sh model execution script before the model is run. Multiple commands should be separated by proper bash syntax – i.e. either with && or ;. One common use of this argument is to load modules on some HPC systems – for instance: <prerun>module load git hdf5</prerun> If your particular version of ED is failing early during execution with a mysterious “Segmentation fault”, that may indicate that its process is exceeding its stack limit. In this case, you may need to remove the stack limit restriction with a prerun command like the following: <prerun>ulimit -s unlimited</prerun> postrun – Same as <prerun>, but for commands to be run after model execution. binary – The full path to the ED2 binary on the target machine. binary_args – Additional arguments to be passed to the ED2 binary. Some common arguments are: -s – Delay OpenMPI initialization until the last possible moment. This is needed when running ED2 in a Docker container. It is included by default when the host is rabbitmq. -f /path/to/ED2IN – Full path to a specific ED2IN namelist file. Typically, this is not needed because, by default, ED searches for the ED2IN in the current directory and the PEcAn workflow places the ED2IN file and a symbolic link to the ED executable in the same (run) directory for you. run/site lat – Latitude coordinate of site lon – Longitude coordinate of site inputs met/path – Path to ED_MET_DRIVER_HEADER file pss: [required] location of patch file css: [required] location of cohort file site: [optional] location of site file lu: [required] location of land use file thsums: [required] location of thermal sums file veg: [required] location of vegetation data soil: [required] location of soil data 14.4.3 PFT configuration in ED2 ED2 has more detailed PFTs than many models, and a more complex system for configuring these PFTs. ED2 has 17 PFTs, based roughly on growth form (e.g. tree vs. grass), biome (tropical vs. temperate), leaf morphology (broad vs. needleleaf), leaf phenology (evergreen vs. deciduous), and successional status (e.g. early, mid, or late). Each PFT is assigned an integer (1-17), which is used by the ED model to assign default model parameters. The mappings of these integers onto PFT definitions are not absolute, and may change as the ED2 source code evolves. Unfortunately, the only authoritative source for these PFT definitions for any given ED2 version is the Fortran source code of that version. The following is the mapping as of ED2 commit 24e6df6a (October 2018): C4 grass Early-successional tropical Mid-successional tropical Late-successional tropical Temperate C3 grass Northern pine Southern pine Late-successional conifer Early-successional temperate deciduous Mid-successional temperate deciduous Late-successional temperate deciduous Agricultural (crop) 1 Agricultural (crop) 2 Agricultural (crop) 3 Agricultural (crop) 4 Subtropical C3 grass (C4 grass with C3 photosynthesis) “Araucaria” (non-optimized southern pine), or liana ED2 parameter defaults are hard-coded in its Fortran source code. However, most parameters can be modified via an XML file (determined by the ED2IN IEDCNFGF field; usually config.xml in the same directory as the ED2IN file). The complete record of all parameters (defaults and user overrides) used by a given ED2 run is stored in a history.xml file (usually in the same directory as the ED2IN) – this is the file to check to make sure that an ED2 run is parameterized as you expect. As with other models, PEcAn can set ED2 parameters using its built-in trait meta analysis. The function specifically responsible for writing the config.xml is PEcAn.ED2::write.config.xml.ED2 (which is called as part of the more general PEcAn.ED2::write.config.ED2). The configuration process proceeds as follows: First, the mappings between PEcAn PFT names and ED2 PFT numbers are determined according to the following logic: If the PFT has a <ed2_pft_number> XML tag, that number is used. For example: <pft> <name>umbs.early_hardwood</name> <ed2_pft_number>9</ed2_pft_number> </pft> If <ed2_pft_number> is not provided, the code tries to cross-reference the PFT name against the pftmapping.csv data provided with the PEcAn.ED2 package. If the name is not matched (perfectly!), the function will exit with an error. Second, the PFT number from the previous step is used to write that PFT’s parameters to the config.xml. The order of precedence for parameters is as follows (from highest to lowest): Explicit user overrides. These are specified via a <constants> tag in the PFT definition in the pecan.xml. For example: <pft> <name>umbs.early_hardwood</name> <constants> <sla>36.3</sla> </constants> </pft> Note that these values are passed through PEcAn.ED2::convert.samples.ED, so they should generally be given in PEcAn’s default units rather than ED2’s. Samples from the PEcAn meta analysis. These are also converted via PEcAn.ED2::convert.samples.ED. ED2 defaults that PEcAn knows about. These are stored in the edhistory.csv file inside of PEcAn.ED2 This file is re-generated manually whenever there is a new version of ED2, so while we try our best to keep it up to date, there is no guarantee that it is. (Implicitly) Defaults in the ED2 Fortran source code. In general, our goal is to set all parameters through PEcAn (via steps 1-3), but if you are running PEcAn with new or experimental versions of ED2, you should be extra careful to make sure ED2 is running with the parameters you intend. Again, the best way to know which parameters ED2 is actually using is to check the history.xml file produced once the run starts. The ED2IN field INCLUDE_THESE_PFT controls which of these PFTs are included in a given ED2 run. By default, PEcAn will set this field to only include the PFTs specified by the user. This is the recommended behavior because it ensures that all PFTs in ED2 were parameterized (one way or another, at least partially) through PEcAn. However, if you would like ED2 to run with all 17 PFTs (NOTE: using ED2’s internal defaults for all PFTs not specified by the user!), you can set the <all_pfts> XML tag (in the <model> section) to true: <model> <all_pfts>true</all_pfts> </model> 14.4.4 Model specific input files List of inputs required by model, such as met, etc. 14.4.5 Model configuration files ED2 is configured using 2 files which are placed in the run folder. ED2IN : template for this file is located at models/ed/inst/ED2IN.<revision>. The values in this template that need to be modified are described below and are surrounded with @ symbols. config.xml : this file is generated by PEcAn. Some values are stored in the pecan.xml in <pfts><pft><constants> section as well as in <model> section. An example of the template can be found in ED2IN.r82 The ED2IN template can contain the following variables. These will be replaced with actual values when the model configuration is written. **@ENSNAME@** : run id of the simulation, used in template for NL%EXPNME **@START_MONTH@** : start of simulation UTC time, from <run><start.date>, used in template for NL%IMONTHA **@START_DAY@** : start of simulation UTC time, from <run><start.date>, used in template for NL%IDATEA **@START_YEAR@** : start of simulation UTC time, from <run><start.date>, used in template for NL%IYEARA **@END_MONTH@** : end of simulation UTC time, from <run><end.date>, used in template for NL%IMONTHZ **@END_DAY@** : end of simulation UTC time, from <run><end.date>, used in template for NL%IDATEZ **@END_YEAR@** : end of simulation UTC time, from <run><end.date>, used in template for NL%IYEARZ **@SITE_LAT@** : site latitude location, from <run><site><lat>, used in template for NL%POI_LAT **@SITE_LON@** : site longitude location, from <run><site><lon>, used in template for NL%POI_LON **@SITE_MET@** : met header location, from <run><site><met>, used in template for NL%ED_MET_DRIVER_DB **@MET_START@** : first year of met data, from <run><site><met.start>, used in template for NL%METCYC1 **@MET_END@** : last year of met data, from <run><site><met.end>, used in template for NL%METCYCF **@PHENOL_SCHEME@** : phenology scheme, if this variabe is 1 the following 3 fields will be used, otherwise they will be set to empty strings, from <model><phenol.scheme>, used in template for NL%IPHEN_SCHEME **@PHENOL_START@** : first year for phenology, from <model><phenol.start>, used in template for NL%IPHENYS1 and NL%IPHENYF1 @PHENOL_END@** : last year for phenology, from <model><phenol.end>, used in template for NL%IPHENYSF and NL%IPHENYFF @PHENOL@** : path and prefix of the prescribed phenology data, from * <model><phenol>, used in template for NL%PHENPATH **@SITE_PSSCSS@** : path and prefix of the previous ecosystem state, from <model><psscss>, used in template for NL%SFILIN **@ED_VEG@** : path and prefix of the vegetation database, used only to determine the land/water mask, from <model><veg>, used in template for NL%VEG_DATABASE **@ED_SOIL@** : path and prefix of the soil database, used to determine the soil type, from <model><soil>, used in template for NL%SOIL_DATABASE **@ED_INPUTS@** : input directory with dataset to initialise chilling degrees and growing degree days, which is used to drive the cold-deciduous phenology, from <model><inputs>, used in template for NL%THSUMS_DATABASE **@FFILOUT@** : path and prefix for analysis files, generated from <run><host><outdir>/run.id/analysis, used in template for NL%FFILOUT **@SFILOUT@** : path and prefix for history files, generated from <run><host><outdir>/run.id/history, used in template for NL%SFILOUT **@CONFIGFILE@** : XML file containing additional parameter settings, this is always “config.xml”, used in template for NL%IEDCNFGF @OUTDIR@** : location where output files are written (without the runid**), from <run><host><outdir>, should not be used. **@SCRATCH@** : local scratch space for outputs, generated /scratch/<username>/run$scratch, should not be used right now since it only works on ebi-cluster 14.4.6 Installation notes This section contains notes on how to compile the model. The notes for the VM might work on other machines or configurations as well. 14.4.6.1 VM 14.4.6.2 BU geo 14.4.6.3 TACC lonestar module load hdf5 curl -o ED.r82.tgz http://isda.ncsa.illinois.edu/~kooper/EBI/ED.r82.tgz tar zxf ED.r82.tgz rm ED.r82.tgz cd ED.r82/ED/build/bin curl -o include.mk.lonestar http://isda.ncsa.illinois.edu/~kooper/EBI/include.mk.lonestar make OPT=lonestar 14.4.6.4 TACC stampede module load hdf5 curl -o ED.r82.tgz http://isda.ncsa.illinois.edu/~kooper/EBI/ED.r82.tgz tar zxf ED.r82.tgz rm ED.r82.tgz cd ED.r82/ED/build/bin curl -o include.mk.stampede http://isda.ncsa.illinois.edu/~kooper/EBI/include.mk.stampede make OPT=stampede "],["models-gday.html", "14.5 GDAY", " 14.5 GDAY Model Information Home Page Source Code License Authors PEcAn Integration Introduction Introduction about model PEcAn configuration file additions Should list the model specific additions to the PEcAn file here Model specific input files List of inputs required by model, such as met, etc. Model configuration files MODEL is configured using 3 files which are placed in the run folder, as well as a symbolic link to the met file. file1 : template for this file is located at models/MODEL/inst/file1 and is not modified. file2 : template for this file is located at models/MODEL/inst/file2 and is not modified. file3 : template for this file is in models/MODEL/inst/file3 or it is specified in the <model> section as <template>. The values in this template are replaced by those computed in the earlier stages of PEcAN. Installation notes This section contains notes on how to compile the model. The notes for the VM might work on other machines or configurations as well. VM "],["models-ldndc.html", "14.6 LDNDC", " 14.6 LDNDC Model Information Home Page https://ldndc.imk-ifu.kit.edu/about/model.php Source Code License Authors Prof. Dr. Klaus Butterbach-Bahl, Dr. Edwin Haas, … PEcAn Integration Henri Kajasilta 14.6.1 Introduction LandscapeDNDC is designed to simulate biosphere/hydrosphere processes of forest, arable and grassland ecosystems. It is modularly designed and therefore allows for various model selections to be plugged in. Most up-to-date information from LDNDC can be found here, as well as the authors, users guide and documentation of the model: https://ldndc.imk-ifu.kit.edu/index.php PEcAn configuration file additions The following sections of the PEcAn XML are relevant or have influence at the moment: ensemble variable: Options are only LAI and GPP model id: Corresponding the id of LDNDC in BETY run inputs start_date: Start date of the simulation end_date: End date of the simulation 14.6.2 Model specific input files LDNDC takes several files as an input, and can also generate multiple output files. These ńotes regarding input files are written from the PEcAn integration point of view, and detailed information about input files can be found from the user guide, which is available via the above links. Input files: Project — Contains essential information in order to set-up the simulation run. For example, the file contains information of other files which will be used in the simulations as well as the time period when the simulations are taking a place. Setup — Defines which modules are used. Climate — Met data that is used in simulations. Speciesparameters — Species used in the simulations should be defined here and the file holds the approppriate information of the used species. Siteparameters — Works similarly as species parameters, but in the site features point of view. Event — Holds information about the events, which are essential in arable simulations. Site — Specific information about the site. Airchemistry — Information about the airchemistry. 14.6.2.1 Model configuration files Due to the amount of different input files, there are several templates which are used in model configuration. Templates are located at models/ldndc/inst/ and are copied to the simulation’s run directory before the model run. project.ldndc : Project file is populated in write.configs.LDNDC based on the inputs of the PEcAn run. In addition to having the correct input files that are used in the simulation, the project file is filled in with the simulation time period and the directory paths regarding other input files that are read during the simulation. The file also sets the path for output files. These other input files are found from the same directory as the project file when starting the model run, and the output file is created among the other outputs of the simulation. At the moment, simulations are executed half-hourly. template.job : Will be filled in with the information that model execution requires in PEcAn. Calls for example models binary and executes the project.ldndc file. events_template.xml : Has three events (plant, manure and cut), which are filled in in write.configs.LDNDC. Events should be generated based on some event file, but such implementation has not been applied yet. This template is only meant for testing for now. speciesparameter_template.xml : Has one replacable section. This section is filled in within write.configs.LDNDC, and is currently just hard-coded values for perennial grass species. This will be one of the key files, because the used pfts and corresponding pft values are written here. At this stage, it is hard-coded to only have one pft, but it should be generated based on the trait values that are available from BETY database. siteparameters.xml : Some default value for siteparameters. site.xml : Some default values for site. airchemistry.txt : Some default data for airchemistry. climate.txt : This file is created from the netcdf file. A symbolic link will be created into the run directory for it. setup.xml : Contains information which modules the model simulation is using. Default settings should fit for most of the purposes and these are currently hard-coded. 14.6.3 Installation notes Obtaining LDNDC program requires credentials, which the user is able to request from the developers of the model. With the credentials, the pre-compiled LDNDC program can be downloaded here: https://ldndc.imk-ifu.kit.edu/download/download-model.php After obtaining the necessary files, the user should execute the install script that is found from the ldndc-‘version-number’ directory. With linux, executing would happen with command sh install.sh. Succesfull installation will create a .ldndc directory to the user’s home directory. Running the simulations happens by calling the ldndc executable (found from the /bin directory) and giving the path file to the project file that contains the specs of the simulation. Instructions in detail and how to play with these setups are explained more broadly in the user guide. "],["models-linkages.html", "14.7 LINKAGES", " 14.7 LINKAGES Model Information Home Page Source Code License Authors PEcAn Integration Introduction Introduction about model PEcAn configuration file additions Should list the model specific additions to the PEcAn file here Model specific input files List of inputs required by model, such as met, etc. Model configuration files MODEL is configured using 3 files which are placed in the run folder, as well as a symbolic link to the met file. file1 : template for this file is located at models/MODEL/inst/file1 and is not modified. file2 : template for this file is located at models/MODEL/inst/file2 and is not modified. file3 : template for this file is in models/MODEL/inst/file3 or it is specified in the <model> section as <template>. The values in this template are replaced by those computed in the earlier stages of PEcAN. Installation notes This section contains notes on how to compile the model. The notes for the VM might work on other machines or configurations as well. VM "],["models-lpjguess.html", "14.8 LPJ-GUESS", " 14.8 LPJ-GUESS Model Information Home Page Source Code License Authors PEcAn Integration Introduction Introduction about model PEcAn configuration file additions Should list the model specific additions to the PEcAn file here Model specific input files List of inputs required by model, such as met, etc. Model configuration files MODEL is configured using 3 files which are placed in the run folder, as well as a symbolic link to the met file. file1 : template for this file is located at models/MODEL/inst/file1 and is not modified. file2 : template for this file is located at models/MODEL/inst/file2 and is not modified. file3 : template for this file is in models/MODEL/inst/file3 or it is specified in the <model> section as <template>. The values in this template are replaced by those computed in the earlier stages of PEcAN. Installation notes This section contains notes on how to compile the model. The notes for the VM might work on other machines or configurations as well. VM "],["models-maespa.html", "14.9 MAESPA", " 14.9 MAESPA Model Information Home Page http://maespa.github.io/ Source Code http://maespa.github.io/download.html License Authors Belinda Medlyn and Remko Duursma PEcAn Integration Tony Gardella, Martim DeKauwe, Remki Duursma Introduction PEcAn configuration file additions Model specific input files Model configuration files MODEL is configured using 3 files which are placed in the run folder, as well as a symbolic link to the met file. file1 : template for this file is located at models/MODEL/inst/file1 and is not modified. file2 : template for this file is located at models/MODEL/inst/file2 and is not modified. file3 : template for this file is in models/MODEL/inst/file3 or it is specified in the <model> section as <template>. The values in this template are replaced by those computed in the earlier stages of PEcAN. Installation notes Installing the MAESPA model requires cloning the MAESPA Bitbucket Repository, executing the makefile, and ensuring that the Maeswarp R package is correctly installed. To clone and compile the model, execute this code at the command line git clone https://bitbucket.org/remkoduursma/maespa.git cd maespa make clean make maespa.out is your executable. Example input files can be found in the inputfiles directory. Executing measpa.out from within one of the example directories will produce output. MAESPA developers have also developed a wrapper package called Maeswrap. The usual R package installation method install.packages may present issues with downloading an unpacking a dependency package called rgl. Here are a couple of solutions: Solution 1 From the Command Line sudo apt-get install r-cran-rgl then from within R install.packages("Maeswrap") Solution 2 From the Command line sudo apt-get install libglu1-mesa-dev then from within R install.packages("Maeswrap") This section contains notes on how to compile the model. The notes for the VM might work on other machines or configurations as well. VM "],["models-preles.html", "14.10 PRELES", " 14.10 PRELES Model Information Home Page Source Code License Authors PEcAn Integration Introduction Introduction about model PEcAn configuration file additions Should list the model specific additions to the PEcAn file here Model specific input files List of inputs required by model, such as met, etc. Model configuration files MODEL is configured using 3 files which are placed in the run folder, as well as a symbolic link to the met file. file1 : template for this file is located at models/MODEL/inst/file1 and is not modified. file2 : template for this file is located at models/MODEL/inst/file2 and is not modified. file3 : template for this file is in models/MODEL/inst/file3 or it is specified in the <model> section as <template>. The values in this template are replaced by those computed in the earlier stages of PEcAN. Installation notes This section contains notes on how to compile the model. The notes for the VM might work on other machines or configurations as well. VM "],["models-sipnet.html", "14.11 SiPNET", " 14.11 SiPNET Model Information Home Page Source Code License Authors PEcAn Integration Michael Dietze, Rob Kooper Introduction Introduction about model PEcAn configuration file additions Should list the model specific additions to the PEcAn file here Model specific input files List of inputs required by model, such as met, etc. Model configuration files SIPNET is configured using 3 files which are placed in the run folder, as well as a symbolic link to the met file. sipnet.in : template for this file is located at models/sipnet/inst/sipnet.in and is not modified. sipnet.param-spatial : template for this file is located at models/sipnet/inst/template.param-spatial and is not modified. sipnet.param : template for this file is in models/sipnet/inst/template.param or it is specified in the <model> section as <default.param>. The values in this template are replaced by those computed in the earlier stages of PEcAN. Installation notes This section contains notes on how to compile the model. The notes for the VM might work on other machines or configurations as well. SIPNET version unk: if [ ! -e ${HOME}/sipnet_unk ]; then cd curl -o sipnet_unk.tar.gz http://isda.ncsa.illinois.edu/~kooper/PEcAn/models/sipnet_unk.tar.gz tar zxf sipnet_unk.tar.gz rm sipnet_unk.tar.gz fi cd ${HOME}/sipnet_unk/ make clean make sudo cp sipnet /usr/local/bin/sipnet.runk make clean SIPNET version 136: if [ ! -e ${HOME}/sipnet_r136 ]; then cd curl -o sipnet_r136.tar.gz http://isda.ncsa.illinois.edu/~kooper/EBI/sipnet_r136.tar.gz tar zxf sipnet_r136.tar.gz rm sipnet_r136.tar.gz sed -i 's#$(LD) $(LIBLINKS) \\(.*\\)#$(LD) \\1 $(LIBLINKS)#' ${HOME}/sipnet_r136/Makefile fi cd ${HOME}/sipnet_r136/ make clean make sudo cp sipnet /usr/local/bin/sipnet.r136 make clean VM "],["models-stics.html", "14.12 STICS", " 14.12 STICS Model Information Home Page https://www6.paca.inrae.fr/stics/ Source Code License Authors PEcAn Integration Istem Fer Introduction STICS (Simulateur mulTIdisciplinaire pour les Cultures Standard) is a crop model that has been developed since 1996 at INRA (French National Institute for Agronomic Research) in collaboration with other research (CIRAD, Irstea, Ecole des Mines de Paris, ESA, LSCE) or professional (ARVALIS, Terres Inovia, CTIFL, ITV, ITB, Agrotransferts, etc.) and teaching institutes or organizations. PEcAn configuration file additions Should list the model specific additions to the PEcAn file here Model specific input files List of inputs required by model, such as met, etc. Model configuration files STICS is configured using different XML files located in two fixed directories: config, plant and user defined workspace(s) directorie(s). A java app called JavaStics allows users to generate these files. Installation notes The software (JavaStics interface and STICS model) is available for download after a registration procedure (see procedure at http://www6.paca.inra.fr/stics_eng/ Download). VM "],["met-drivers.html", "15 Available Meteorological Drivers ", " 15 Available Meteorological Drivers "],["ameriflux.html", "15.1 Ameriflux", " 15.1 Ameriflux Scale: site Resolution: 30 or 60 min Availability: varies by site http:\\/\\/ameriflux.lbl.gov\\/data\\/data-availability\\/ Notes: Old ORNL server, use is deprecated "],["amerifluxlbl.html", "15.2 AmerifluxLBL", " 15.2 AmerifluxLBL Scale: site Resolution: 30 or 60 min Availability: varies by site http:\\/\\/ameriflux.lbl.gov\\/data\\/data-availability\\/ Notes: new Lawrence Berkeley Lab server "],["fluxnet2015.html", "15.3 Fluxnet2015", " 15.3 Fluxnet2015 Scale: site Resolution: 30 or 60 min Availability: varies by site http://fluxnet.fluxdata.org/sites/site-list-and-pages Notes: Fluxnet 2015 synthesis product. Does not cover all FLUXNET sites "],["narr.html", "15.4 NARR", " 15.4 NARR Scale: North America Resolution: 3 hr, approx. 32km \\(Lambert conical projection\\) Availability: 1979-present "],["cruncep.html", "15.5 CRUNCEP", " 15.5 CRUNCEP Scale: global Resolution: 6hr, 0.5 degree Availability: 1901-2010 "],["cmip5.html", "15.6 CMIP5", " 15.6 CMIP5 Scale: varies by model Resolution: 3 hr Availability: 2006-2100 Currently only GFDL available. Different scenerios and ensemble members can be set via Advanced Edit. "],["nldas.html", "15.7 NLDAS", " 15.7 NLDAS Scale: Lower 48 + buffer, Resolution: 1 hour, .125 degree Availability: 1980-present "],["gldas.html", "15.8 GLDAS", " 15.8 GLDAS Scale: Global Resolution: 3hr, 1 degree Availability: 1948-2010 "],["paleon.html", "15.9 PalEON", " 15.9 PalEON Scale: -100 to -60 W Lon, 35 to 50 N Latitude \\(US northern hardwoods + buffer\\) Resolution: 6hr, 0.5 degree Availability: 850-2010 "],["fluxnetlathuile.html", "15.10 FluxnetLaThuile", " 15.10 FluxnetLaThuile Scale: site Resolution: 30 or 60 min Availability: varies by site http:\\/\\/www.fluxdata.org\\/DataInfo\\/Dataset%20Doc%20Lib\\/SynthDataSummary.aspx Notes: 2007 synthesis. Fluxnet2015 supercedes this for sites that have been updated "],["geostreams.html", "15.11 Geostreams", " 15.11 Geostreams Scale: site Resolution: varies Availability: varies by site Notes: This is a protocol, not a single archive. The PEcAn functions currently default to querying [https://terraref.ncsa.illinois.edu/clowder/api/geostreams], which requires login and contains data from only two sites (Urbana IL and Maricopa AZ). However the interface can be used with any server that supports the Geostreams API. "],["era5.html", "15.12 ERA5", " 15.12 ERA5 Scale: Global Resolution: 3 hrs and 31 km Availability: 1950-present Notes: It’s important to know that the raw ERA5 tiles needs to be downloaded and registered in the database first. Inside the inst folder in the data.atmosphere package there are R files for downloading and registering files in the BETY. However, it assumes that you have registered and setup your API requirements. Check out how to setup your API [here] (https://confluence.ecmwf.int/display/CKB/How+to+download+ERA5#HowtodownloadERA5-3-DownloadERA5datathroughtheCDSAPI). In the inst folder you can find two files (ERA5_db_register.R and ERA5_USA_download.R). If you setup your ecmwf account as it’s explained in the link above, ERA5_USA_download.R will help you to download all the tiles with all the variables required for pecan extract.nc.ERA5 function to generate pecan standard met files. Besides installing the required packages for this file, it should work from top to bottom with no problem. After downloading the tiles, there is simple script in ERA5_db_register.R which helps you register your tiles in the bety. met.process later on uses that entery to find the required tiles for extracting met data for your sites. There are important points about this file. 1- Make sure you don’t change the site id in the script (which is the same the ParentSite in ERA5 registeration xml file). 2- Make sure the start and end date in that script matches the downloaded tiles. Set your ERA5.files.path to where you downloaded the tiles and then the rest of the script should be working fine. "],["icos-drought-2018.html", "15.13 ICOS Drought 2018", " 15.13 ICOS Drought 2018 Scale: site Resolution: 30 min Availability: Varies by site Notes: To use this option, set source as ICOS and a product tag containing drought2018 in pecan.xml "],["icos-ecosystem-archive.html", "15.14 ICOS Ecosystem Archive", " 15.14 ICOS Ecosystem Archive Scale: site Resolution: 30 min Availability: Varies by site Notes: To use this option, set source as ICOS and a product tag containing etc in pecan.xml "],["download-gfdl.html", "15.15 Download GFDL", " 15.15 Download GFDL The Downlad.GFDL function assimilates 3 hour frequency CMIP5 outputs generated by multiple GFDL models. GFDL developed several distinct modeling streams on the timescale of CMIP5 and AR5. These models include CM3, ESM2M and ESM2G with a spatial resolution of 2 degrees latitude by 2.5 degrees longitude. Each model has future outputs for the AR5 Representative Concentration Pathways ranging from 2006-2100. "],["cm3.html", "15.16 CM3", " 15.16 CM3 GFDL’s CMIP5 experiments with CM3 included many of the integrations found in the long-term CMIP5 experimental design. The focus of this physical climate model is on the role of aerosols, aerosol-cloud interactions, and atmospheric chemistry in climate variability and climate change. "],["esm2m-esm2g.html", "15.17 ESM2M & ESM2G", " 15.17 ESM2M & ESM2G Two new models representing ocean physics with alternative numerical frameworks to explore the implications of some of the fundamental assumptions embedded in these models. Both ESM2M and ESM2G utilize a more advanced land model, LM3, than was available in ESM2.1 including a variety of enhancements (Milly et al., in prep). GFDL’s CMIP5 experiments with Earth System Models included many of the integrations found in the long-term CMIP5 experimental design. The ESMs, by design, close the carbon cycle and are used to study the impact of climate change on ecosystems, ecosystem changes on climate and human activities on ecosystems. For more information please navigate here CM# ESM2M ESM2G rcp26 r1i1p1 r1i1p1 rcp45 r1i1p1, r3i1p1,r5i1p1 r1i1p1 r1i1p1 rcp60 r1i1p1 r1i1p1 rcp85 r1i1p1 r1i1p1 r1i1p1 "],["pecan-project-api.html", "16 PEcAn Project API ", " 16 PEcAn Project API "],["introduction-4.html", "16.1 Introduction", " 16.1 Introduction Welcome to the PEcAn Project API Documentation. The Predictive Ecosystem Analyser (PEcAn) Project is an open source framework initiated to meet the demands for more accessible, transparent & repeatable modeling of ecosystems. PEcAn can be considered as an ecoinformatics toolbox combined with a set of workflows that wrap around ecosystem models that allow users to effectively perform data synthesis, propagation of uncertainty through a model & ecological predictions in an integrated fashion using a diverse repository of data & models. Our API allows users to remotely interact with the PEcAn servers and leverage the functionalities provided by the PEcAn Project. It has been designed to follow common RESTful API conventions. Most operations are performed using the HTTP methods: GET (retrieve) & POST (create). Please note that the PEcAn Project API is currently under active development and is possible that any information in this document is subject to change. "],["authentication.html", "16.2 Authentication", " 16.2 Authentication Authentication to the PEcAn API occurs via Basic HTTP Auth. The credentials for using the API are the same as those used to log into PEcAn & BetyDB. Here is how you use basic HTTP auth with curl: $ curl --user '<username>:<password>' <api-endpoint> Authentication also depends on the PEcAn server that the user interacts with. Some servers, at the time of deployment have the AUTH_REQ = FALSE, meaning that such servers do not require user autertication for the usage of the PEcAn APIs. Regardless of the type of server, the endpoints defind under General section can be accessed without any authentication. "],["restful-api-endpoints.html", "16.3 RESTful API Endpoints", " 16.3 RESTful API Endpoints This page contains the high-level overviews & the functionalities offered by the different RESTful endpoints of the PEcAn API. For the most up-to-date documentation, you can visit the PEcAn API Documentation. The currently implemented functionalities include: General: GET /api/ping: Ping the server to check if it is live GET /api/status: Obtain general information about PEcAn & the details of the database host Models: GET /api/models/: Search for model(s) using search pattern based on model name & revision GET /api/models/{model_id}: Fetch the details of specific model Sites: GET /api/sites/: Search for site(s) using search pattern based on site name GET /api/sites/{site_id}: Fetch the details of specific site PFTs: GET /api/pfts/: Search for PFT(s) using search pattern based on PFT name, PFT type & Model type GET /api/pfts/{pft_id}: Fetch the details of specific PFT Formats: GET /api/formats/: Search for format(s) using search pattern based on format name & mime type GET /api/formats/{format_id}: Fetch the details of specific Format Inputs: GET /api/inputs/: Search for inputs needed for a PEcAn workflow based on model_id, site_id, format_id & host_id. GET /api/inputs/{input_id} *: Download the desired input file (if the input_id corresponds to a folder, an optional filename argument must be provided) Workflows: GET /api/workflows/: Retrieve a list of PEcAn workflows POST /api/workflows/: Submit a new PEcAn workflow GET /api/workflows/{id}: Obtain the details of a particular PEcAn workflow by supplying its ID GET /api/workflows/{id}/status: Obtain the status of a particular PEcAn workflow GET /api/workflows/{id}/file/{filename}: Download the desired file from a PEcAn workflow Runs: GET /api/runs: Get the list of all the runs GET /api/runs/{run_id}: Fetch the details of a specified PEcAn run GET /api/runs/{run_id}/input/{filename}: Download the desired input file for a run GET /api/runs/{run_id}/output/{filename}: Download the desired output file for a run GET /api/runs/{run_id}/graph/{year}/{y_var}: Plot the graph of desired output variables for a run * indicates that the particular API is under development & may not be ready for use "],["examples.html", "16.4 Examples:", " 16.4 Examples: 16.4.1 Prerequisites to interact with the PEcAn API Server 16.4.1.1 R Packages httr jsonlite xml2 16.4.1.2 Python Packages requests json xml Following are some example snippets to call the PEcAn API endpoints: 16.4.2 GET /api/ping 16.4.2.1 R Snippet res <- httr::GET("http://pecan.localhost/api/ping") print(jsonlite::fromJSON(rawToChar(res$content))) ## $request ## [1] "ping" ## $response ## [1] "pong" 16.4.2.2 Python Snippet response = requests.get("http://pecan.localhost/api/ping") print(json.dumps(response.json(), indent=2)) ## { ## "request": "ping", ## "response": "pong" ## } 16.4.3 GET /api/status 16.4.3.1 R Snippet res <- httr::GET("http://pecan.localhost/api/status") print(jsonlite::fromJSON(rawToChar(res$content))) ## $pecan_details$version ## [1] "1.7.0" ## $pecan_details$branch ## [1] "develop" ## $pecan_details$gitsha1 ## [1] "unknown" ## $host_details$hostid ## [1] 99 ## $host_details$hostname ## [1] "" ## $host_details$start ## [1] 99000000000 ## $host_details$end ## [1] 99999999999 ## $host_details$sync_url ## [1] "" ## $host_details$sync_contact ## [1] "" 16.4.3.2 Python Snippet response = requests.get("http://pecan.localhost/api/status") print(json.dumps(response.json(), indent=2)) ## { ## "pecan_details": { ## "version": "1.7.0", ## "branch": "develop", ## "gitsha1": "unknown" ## }, ## "host_details": { ## "hostid": 99, ## "hostname": "", ## "start": 99000000000, ## "end": 99999999999, ## "sync_url": "", ## "sync_contact": "" ## } ## } 16.4.4 GET /api/models/ 16.4.4.1 R Snippet # Search model(s) with `model_name` containing "sip" & `revision` containing "ssr" res <- httr::GET( "http://pecan.localhost/api/models/?model_name=sip&revision=ssr&ignore_case=TRUE", httr::authenticate("carya", "illinois") ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $models ## model_id model_name revision ## 1 1000000022 SIPNET ssr ## $count ## [1] 1 16.4.4.2 Python Snippet # Search model(s) with `model_name` containing "sip" & `revision` containing "ssr" response = requests.get( "http://pecan.localhost/api/models/?model_name=sip&revision=ssr&ignore_case=TRUE", auth=HTTPBasicAuth('carya', 'illinois') ) print(json.dumps(response.json(), indent=2)) ## { ## "models": [ ## { ## "model_id": "1000000022", ## "model_name": "SIPNET", ## "revision": "ssr" ## } ## ], ## "count": 1 ## } 16.4.5 GET /api/models/{model_id} 16.4.5.1 R Snippet # Fetch the details of PEcAn model with id = 1000000022 res <- httr::GET( "http://pecan.localhost/api/models/1000000022", httr::authenticate("carya", "illinois") ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $modeltype_id ## [1] 3 ## $model_type ## [1] "SIPNET" ## $model_id ## [1] 1000000022 ## $model_name ## [1] "SIPNET" ## $revision ## [1] "ssr" ## $inputs ## input required ## 1 met TRUE ## 2 poolinitcond FALSE 16.4.5.2 Python Snippet # Fetch the details of PEcAn model with id = 1000000022 response = requests.get( "http://pecan.localhost/api/models/1000000022", auth=HTTPBasicAuth('carya', 'illinois') ) print(json.dumps(response.json(), indent=2)) ## { ## "model_id": "1000000022", ## "model_name": "SIPNET", ## "revision": "ssr", ## "modeltype_id": 3, ## "model_type": "SIPNET" ## "inputs": [ ## { ## "input": "met", ## "required": TRUE ## }, ## { ## "input": "poolinitcond", ## "required": FALSE ## } ## ] ## } 16.4.6 GET /api/sites/ 16.4.6.1 R Snippet # Search site(s) with `site_name` containing "willow" res <- httr::GET( "http://pecan.localhost/api/sites/?sitename=willow&ignore_case=TRUE", httr::authenticate("carya", "illinois") ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $sites ## id sitename ## 1 676 Willow Creek (US-WCr) ## 2 1108 Willow Creek (WC)-Chequamegon National Forest ## 3 1202 Tully_willow ## 4 1223 Saare SRF willow plantation ## 5 1000005151 Willow Creek (US-WCr) ## $count ## [1] 5 16.4.6.2 Python Snippet # Search site(s) with `site_name` containing "willow" response = requests.get( "http://pecan.localhost/api/models/?sitename=willow&ignore_case=TRUE", auth=HTTPBasicAuth('carya', 'illinois') ) print(json.dumps(response.json(), indent=2)) ## { ## "sites": [ ## { ## "id": 676, ## "sitename": "Willow Creek (US-WCr)" ## }, ## { ## "id": 1108, ## "sitename": "Willow Creek (WC)-Chequamegon National Forest" ## }, ## { ## "id": 1202, ## "sitename": "Tully_willow" ## }, ## { ## "id": 1223, ## "sitename": "Saare SRF willow plantation" ## }, ## { ## "id": 1000005151, ## "sitename": "Willow Creek (US-WCr)" ## } ## ], ## "count": 5 ## } 16.4.7 GET /api/sites/{site_id} 16.4.7.1 R Snippet # Fetch the details of PEcAn site with id = 676 res <- httr::GET( "http://pecan.localhost/api/sites/676", httr::authenticate("carya", "illinois") ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $id ## [1] 676 ## $city ## [1] "Park Falls Ranger District" ## $state ## [1] "Wisconsin" ## $country ## [1] "US" ## $mat ## [1] 4 ## $map ## [1] 815 ## $soil ## [1] "" ## $som ## [1] "NA" ## $notes ## [1] "MF" ## $soilnotes ## [1] "" ## $sitename ## [1] "Willow Creek (US-WCr)" ## $greenhouse [1] FALSE ## $sand_pct ## [1] 42.52 ## $clay_pct ## [1] 20.17 ## $time_zone ## [1] "America/Chicago" 16.4.7.2 Python Snippet # Fetch the details of PEcAn site with id = 676 response = requests.get( "http://pecan.localhost/api/sites/676", auth=HTTPBasicAuth('carya', 'illinois') ) print(json.dumps(response.json(), indent=2)) ## { ## "id": 676, ## "city": "Park Falls Ranger District", ## "state": "Wisconsin", ## "country": "US", ## "mat": 4, ## "map": 815, ## "soil": "", ## "notes": "MF", ## "soilnotes": "", ## "sitename": "Willow Creek (US-WCr)", ## "greenhouse": false, ## "sand_pct": 42.52, ## "clay_pct": 20.17, ## "time_zone": "America/Chicago" ## } 16.4.8 GET /api/pfts/ 16.4.8.1 R Snippet # Search pft(s) of "plant" type with `pft_name` containing "temperate" & belonging to `model_type` "SIPNET" res <- httr::GET( "http://pecan.localhost/api/pfts/?pft_name=temperate&pft_type=plant&model_type=sipnet&ignore_case=TRUE", httr::authenticate("carya", "illinois") ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $pfts ## model_type pft_id pft_name pft_type ## <chr> <S3: integer64> <chr> <chr> ## 1 SIPNET 41 temperate.deciduous plant ## 2 SIPNET 1000000105 temperate.deciduous.IF plant ## 3 SIPNET 1000000107 temperate.deciduous_SDA plant ## 4 SIPNET 1000000115 temperate.deciduous.ALL plant ## 5 SIPNET 1000000118 temperate.deciduous.ALL.NORMAL plant ## 6 SIPNET 2000000017 tundra.deciduous.NGEE_Arctic plant ## 7 SIPNET 2000000045 temperate.broadleaf.deciduous plant ## $count ## [1] 7 16.4.8.2 Python Snippet # Search pft(s) of "plant" type with `pft_name` containing "temperate" & belonging to `model_type` "SIPNET" response = requests.get( "http://pecan.localhost/api/pfts/?pft_name=temperate&pft_type=plant&model_type=sipnet&ignore_case=TRUE", auth=HTTPBasicAuth('carya', 'illinois') ) print(json.dumps(response.json(), indent=2)) ## { ## "pfts": [ ## { ## "model_type": "SIPNET", ## "pft_id": 41, ## "pft_name": "temperate.deciduous", ## "pft_type": "plant" ## }, ## ... ## ], ## "count": 7 ## } 16.4.9 GET /api/pfts/{pft_id} 16.4.9.1 R Snippet # Fetch the details of PEcAn PFT with id = 2000000045 res <- httr::GET( "http://pecan.localhost/api/pfts/2000000045", httr::authenticate("carya", "illinois") ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $model_type ## [1] "SIPNET" ## $pft_id ## [1] 2000000045 ## $pft_name ## [1] "temperate.broadleaf.deciduous" ## $definition ## [1] "SIPNET Temperate Deciduous PFT with priors on all parameters" ## $pft_type ## [1] "plant" 16.4.9.2 Python Snippet # Fetch the details of PEcAn PFT with id = 2000000045 response = requests.get( "http://pecan.localhost/api/pfts/2000000045", auth=HTTPBasicAuth('carya', 'illinois') ) print(json.dumps(response.json(), indent=2)) ## { ## "model_type": "SIPNET", ## "pft_id": 2000000045, ## "pft_name": "temperate.broadleaf.deciduous", ## "definition": "SIPNET Temperate Deciduous PFT with priors on all parameters", ## "pft_type": "plant" ## } 16.4.10 GET /api/formats/ 16.4.10.1 R Snippet # Search format(s) with name containing 'ameriflux' & mime type containing 'csv' res <- httr::GET( "http://pecan.localhost/api/formats/?format_name=ameriflux&mimetype=csv&ignore_case=TRUE", httr::authenticate("carya", "illinois") ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $formats ## mimetype format_id format_name ## 1 text/csv 19 AmeriFlux.level4.h ## 2 text/csv 20 AmeriFlux.level4.d ## 3 text/csv 21 AmeriFlux.level4.w ## 4 text/csv 22 AmeriFlux.level4.m ## 5 text/csv 35 AmeriFlux.level2.h ## 6 text/csv 36 AmeriFlux.level3.h ## $count ## [1] 6 16.4.10.2 Python Snippet # # Search format(s) with name containing 'ameriflux' & mime type containing 'csv' response = requests.get( "http://pecan.localhost/api/formats/?format_name=ameriflux&mimetype=csv&ignore_case=TRUE", auth=HTTPBasicAuth('carya', 'illinois') ) print(json.dumps(response.json(), indent=2)) ## { ## "formats": [ ## { ## "mimetype": "text/csv", ## "format_id": 19, ## "format_name": "AmeriFlux.level4.h" ## }, ## { ## "mimetype": "text/csv", ## "format_id": 20, ## "format_name": "AmeriFlux.level4.d" ## }, ## ... ## ], ## "count": 6 ## } 16.4.11 GET /api/formats/{format_id} 16.4.11.1 R Snippet # Fetch the details of PEcAn format with id = 19 res <- httr::GET( "http://pecan.localhost/api/formats/19", httr::authenticate("carya", "illinois") ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $mimetype ## [1] "text/csv" ## $format_id ## [1] 19 ## $name ## [1] "AmeriFlux.level4.h" ## $notes ## [1] "Half-hourly AmeriFlux level 4 gap filled, partitioned, and flagged flux tower data. Variables description: Level 4 data are obtained from the level 3 products, data are ustar filtered, gap-filled using different methods (ANN and MDS) and partitioned (i.e. NEE, GPP, and Re). Flags with information regarding quality of the original and gapfilled data are added. Missing values: -9999." ## $header ## [1] "" ## $format_variables ## description name unit ## 1 Latent heat flux LE_f W m-2 ## 2 Sensible heat flux H_f W m-2 ## 3 air temperature Ta_f degrees C ## 4 Vapor Pressure Deficit VPD_f Pa ## 5 Cumulative ecosystem respiration over a specified time step Reco umol C02 m-2 s-1 ## 6 Net ecosystem exchange NEE_st_fMDS umol C m-2 s-1 16.4.11.2 Python Snippet # Fetch the details of PEcAn format with id = 19 response = requests.get( "http://pecan.localhost/api/formats/19", auth=HTTPBasicAuth('carya', 'illinois') ) print(json.dumps(response.json(), indent=2)) ## { ## "mimetype": "text/csv", ## "format_id": 19, ## "name": "AmeriFlux.level4.h", ## "notes": "Half-hourly AmeriFlux level 4 gap filled, partitioned, and flagged flux tower data. Variables description: ## Level 4 data are obtained from the level 3 products, data are ustar filtered, gap-filled using different methods (ANN and ## MDS) and partitioned (i.e. NEE, GPP, and Re). Flags with information regarding quality of the original and gapfilled data ## are added. Missing values: -9999.", ## "header": "", ## "format_variables": [ ## { ## "description": "Latent heat flux", ## "name": "LE_f", ## "unit": "W m-2" ## }, ## ... ## ] ## } 16.4.12 GET /api/inputs/ 16.4.12.1 R Snippet # Get the inputs needed for a workflow with model_id = 1000000022 & site_id = 676 res <- httr::GET( "http://pecan.localhost/api/inputs/?model_id=1000000022&site_id=676", httr::authenticate("carya", "illinois") ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $inputs ## sitename model_name revision tag hostname file_name format_name mimetype ## 1 Willow Creek (US-WCr) SIPNET ssr met ebi-forecast.igb.illinois.edu wcr.clim Sipnet.climna text/csv ## 2 Willow Creek (US-WCr) SIPNET ssr met docker wcr.clim Sipnet.climna text/csv ## file_path id input_name start_date end_date ## 1 /home/share/data/dbfiles/ 235 2000-01-01 06:00:00 2007-01-01 05:59:00 ## 2 /data/sites/willow 2000000001 1998-01-01 00:00:00 2006-12-31 00:00:00 ## $count ## [1] 2 # Get the inputs needed for a workflow with format_id = 5000000002 (AMERIFLUX_BASE_HH) & host_id = 99000000001 (docker) res <- httr::GET( "http://pecan.localhost/api/inputs/?format_id=5000000002&host_id=99000000001", httr::authenticate("carya", "illinois") ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $inputs ## sitename mimetype format_name hostname file_name ## 1 Niwot Ridge Forest/LTER NWT1 (US-NR1) text/csv AMERIFLUX_BASE_HH docker AMF_US-NR1_BASE_HH_15-5 ## file_path id input_name start_date end_date ## 1 /data/dbfiles/AmerifluxLBL_site_0-772 1000011238 AmerifluxLBL_site_0-772 1998-01-01 2016-12-31 ## $count ## [1] 1 16.4.12.2 Python Snippet # Get the inputs needed for a workflow with model_id = 1000000022 & site_id = 676 response = requests.get( "http://pecan.localhost/api/inputs/?model_id=1000000022&site_id=676", auth=HTTPBasicAuth('carya', 'illinois') ) print(json.dumps(response.json(), indent=2)) ## { ## "inputs": [ ## { ## "sitename": "Willow Creek (US-WCr)", ## "model_name": "SIPNET", ## "revision": "ssr", ## "tag": "met", ## "hostname": "ebi-forecast.igb.illinois.edu", ## "file_name": "wcr.clim", ## "format_name": "Sipnet.climna", ## "mimetype": "text/csv", ## "file_path": "/home/share/data/dbfiles/", ## "id": 235, ## "input_name": "", ## "start_date": "2000-01-01 06:00:00", ## "end_date": "2007-01-01 05:59:00" ## }, ## ... ## ], ## "count": 2 ## } # Get the inputs needed for a workflow with format_id = 5000000002 (AMERIFLUX_BASE_HH) & host_id = 99000000001 (docker) response = requests.get( "http://pecan.localhost/api/inputs/?format_id=5000000002&host_id=99000000001", auth=HTTPBasicAuth('carya', 'illinois') ) print(json.dumps(response.json(), indent=2)) ## { ## "inputs": [ ## { ## "sitename": "Niwot Ridge Forest/LTER NWT1 (US-NR1)", ## "hostname": "docker", ## "file_name": "AMF_US-NR1_BASE_HH_15-5", ## "format_name": "AMERIFLUX_BASE_HH", ## "mimetype": "text/csv", ## "file_path": "/data/dbfiles/AmerifluxLBL_site_0-772", ## "id": 1000011238, ## "input_name": "AmerifluxLBL_site_0-772", ## "start_date": "1998-01-01", ## "end_date": "2016-12-31" ## } ## ], ## "count": 1, ## } 16.4.13 GET /api/inputs/{input_id} 16.4.13.1 R Snippet # Download the input file with id = 99000000003 res <- httr::GET( "http://pecan.localhost/api/inputs/99000000003", httr::authenticate("carya", "illinois") ) writeBin(res$content, "test.2002.nc") # Download the file 'fraction.plantation' from the input directory with id = 295 res <- httr::GET( "http://pecan.localhost/api/inputs/295?filename=fraction.plantation", httr::authenticate("carya", "illinois") ) writeBin(res$content, "test.fraction.plantation") 16.4.13.2 Python Snippet # Download the input file for with id = 99000000003 response = requests.get( "http://pecan.localhost/api/inputs/99000000003", auth=HTTPBasicAuth('carya', 'illinois') ) with open("test.2002.nc", "wb") as file: file.write(response.content) # Download the file 'fraction.plantation' from the input directory with id = 295 response = requests.get( "http://pecan.localhost/api/inputs/295?filename=fraction.plantation", auth=HTTPBasicAuth('carya', 'illinois') ) with open("test.2002.nc", "wb") as file: file.write(response.content) 16.4.14 GET /api/workflows/ 16.4.14.1 R Snippet # Get workflow(s) that use `model_id` = 1000000022 [SIPNET] & `site_id` = 676 [Willow Creek (US-WCr)] res <- httr::GET( "http://pecan.localhost/api/workflows/?model_id=1000000022&site_id=676", httr::authenticate("carya", "illinois") ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $workflows ## id folder started_at site_id model_id hostname start_date end_date ## 1 1000009900 /fs/data2/output//PEcAn_1000009900 2018-11-09 08:56:37 676 1000000022 geo.bu.edu 2004-01-01 2004-12-31 ## 2 1000009172 /fs/data2/output//PEcAn_1000009172 2018-04-11 18:14:52 676 1000000022 test-pecan.bu.edu 2004-01-01 2004-12-31 ## ... ## $count ## [1] 5 16.4.14.2 Python Snippet # Get workflow(s) that use `model_id` = 1000000022 [SIPNET] & `site_id` = 676 [Willow Creek (US-WCr)] response = requests.get( "http://pecan.localhost/api/workflows/?model_id=1000000022&site_id=676", auth=HTTPBasicAuth('carya', 'illinois') ) print(json.dumps(response.json(), indent=2)) ## { ## "workflows": [ ## { ## "id": 1000009172, ## "folder": "/fs/data2/output//PEcAn_1000009900", ## "started_at": "2018-11-09 08:56:37", ## "site_id": 676, ## "model_id": 1000000022, ## "hostname": "geo.bu.edu", ## "start_date": "2004-01-01", ## "end_date": "2004-12-31" ## }, ## ... ## ], ## "count": 5 ## } 16.4.15 POST /api/workflows/ 16.4.15.1 R Snippet # Submit a workflow in XML format for execution xmlFile <- "pecan/tests/api.sipnet.xml" xml_string <- paste0(xml2::read_xml(xmlFile)) res <- httr::POST( "http://pecan.localhost/api/workflows/", httr::authenticate("carya", "illinois"), httr::content_type("application/xml"), body = xml_string ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $workflow_id ## [1] 99000000001 ## $status ## [1] "Submitted successfully" 16.4.15.2 Python Snippet # Submit a workflow in XML format for execution xml_file = "pecan/tests/api.sipnet.xml" root = xml.etree.ElementTree.parse(xml_file).getroot() response = requests.post( "http://pecan.localhost/api/workflows/", auth=HTTPBasicAuth('carya', 'illinois'), headers = {'Content-Type': 'application/xml'}, data = xml.etree.ElementTree.tostring(root, encoding='unicode', method='xml') ) print(json.dumps(response.json(), indent=2)) ## { ## "workflow_id": "99000000001", ## "status": "Submitted successfully" ## } 16.4.16 GET /api/workflows/{id} 16.4.16.1 R Snippet # Get details of workflow with `id` = '1000009172' res <- httr::GET( "http://pecan.localhost/api/workflows/1000009172", httr::authenticate("carya", "illinois") ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $id ## [1] "1000009172" ## $folder ## [1] "/fs/data2/output//PEcAn_1000009172" ## $hostname ## [1] "test-pecan.bu.edu" ## $user_id ## [1] "NA" ## $properties ## $properties$end ## [1] "2004/12/31" ## $properties$pft ## $properties$pft[[1]] ## [1] "soil.IF" ## $properties$pft[[2]] ## [1] "temperate.deciduous.IF" ## $properties$email ## [1] "" ## $properties$notes ## [1] "" ## $properties$start ## [1] "2004/01/01" ## $properties$siteid ## [1] "676" ## $properties$modelid ## [1] "1000000022" ## $properties$hostname ## [1] "test-pecan.bu.edu" ## $properties$sitename ## [1] "WillowCreek(US-WCr)" ## $properties$input_met ## [1] "AmerifluxLBL.SIPNET" ## $properties$pecan_edit ## [1] "on" ## $properties$sitegroupid ## [1] "1000000022" ## $properties$fluxusername ## [1] "pecan" ## $properties$input_poolinitcond ## [1] "-1" 16.4.16.2 Python Snippet # Get details of workflow with `id` = '1000009172' response = requests.get( "http://pecan.localhost/api/workflows/1000009172", auth=HTTPBasicAuth('carya', 'illinois') ) print(json.dumps(response.json(), indent=2)) ## { ## "id": "1000009172", ## "folder": "/fs/data2/output//PEcAn_1000009172", ## "hostname": "test-pecan.bu.edu", ## "user_id": "NA", ## "properties": { ## "end": "2004/12/31", ## "pft": [ ## "soil.IF", ## "temperate.deciduous.IF" ## ], ## "email": "", ## "notes": "", ## "start": "2004/01/01", ## "siteid": "676", ## "modelid": "1000000022", ## "hostname": "test-pecan.bu.edu", ## "sitename": "WillowCreek(US-WCr)", ## "input_met": "AmerifluxLBL.SIPNET", ## "pecan_edit": "on", ## "sitegroupid": "1000000022", ## "fluxusername": "pecan", ## "input_poolinitcond": "-1" ## } ## } 16.4.17 GET /api/workflows/{id}/status 16.4.17.1 R Snippet # Get list of run belonging to the workflow with `workflow_id` = '99000000001' res <- httr::GET( "http://pecan.localhost/api/workflows/99000000001/status", httr::authenticate("carya", "illinois") ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $workflow_id ## [1] "99000000001" ## $status ## [1] "TRAIT 2020-07-22 07:02:33 2020-07-22 07:02:35 DONE " ## [2] "META 2020-07-22 07:02:35 2020-07-22 07:02:38 DONE " ## [3] "CONFIG 2020-07-22 07:02:38 2020-07-22 07:02:40 DONE " ## [4] "MODEL 2020-07-22 07:02:40 2020-07-22 07:04:07 DONE " ## [5] "OUTPUT 2020-07-22 07:04:07 2020-07-22 07:04:08 DONE " ## [6] "ENSEMBLE 2020-07-22 07:04:08 2020-07-22 07:04:09 DONE " ## [7] "SENSITIVITY 2020-07-22 07:04:09 2020-07-22 07:04:16 DONE " ## [8] "FINISHED 2020-07-22 07:04:16 2020-07-22 07:04:16 DONE " 16.4.17.2 Python Snippet # Get list of run belonging to the workflow with `workflow_id` = '99000000001' response = requests.get( "http://pecan.localhost/api/workflows/99000000001/status", auth=HTTPBasicAuth('carya', 'illinois') ) print(json.dumps(response.json(), indent=2)) ## { ## "workflow_id": "99000000001", ## "status": [ ## "TRAIT 2020-07-22 07:02:33 2020-07-22 07:02:35 DONE ", ## "META 2020-07-22 07:02:35 2020-07-22 07:02:38 DONE ", ## "CONFIG 2020-07-22 07:02:38 2020-07-22 07:02:40 DONE ", ## "MODEL 2020-07-22 07:02:40 2020-07-22 07:04:07 DONE ", ## "OUTPUT 2020-07-22 07:04:07 2020-07-22 07:04:08 DONE ", ## "ENSEMBLE 2020-07-22 07:04:08 2020-07-22 07:04:09 DONE ", ## "SENSITIVITY 2020-07-22 07:04:09 2020-07-22 07:04:16 DONE ", ## "FINISHED 2020-07-22 07:04:16 2020-07-22 07:04:16 DONE " ## ] ## } 16.4.18 GET /api/workflows/{id}/file/{filename} 16.4.18.1 R Snippet # Download the 'ensemble.ts.99000000017.NPP.2002.2002.Rdata' output file for the workflow with id = 99000000031 res <- httr::GET( "http://pecan.localhost/api/workflows/99000000031/file/ensemble.ts.99000000017.NPP.2002.2002.Rdata", httr::authenticate("carya", "illinois") ) writeBin(res$content, "test.ensemble.ts.99000000017.NPP.2002.2002.Rdata") 16.4.18.2 Python Snippet # Download the 'ensemble.ts.99000000017.NPP.2002.2002.Rdata' output file for the workflow with id = 99000000031 response = requests.get( "http://pecan.localhost/api/workflows/99000000031/file/ensemble.ts.99000000017.NPP.2002.2002.Rdata", auth=HTTPBasicAuth('carya', 'illinois') ) with open("test.ensemble.ts.99000000017.NPP.2002.2002.Rdata", "wb") as file: file.write(response.content) 16.4.19 GET /api/runs/ 16.4.19.1 R Snippet # Get list of run belonging to the workflow with `workflow_id` = '1000009172' res <- httr::GET( "http://pecan.localhost/api/runs/?workflow_id=1000009172", httr::authenticate("carya", "illinois") ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $runs ## runtype ensemble_id workflow_id id model_id site_id parameter_list start_time ## finish_time ## 1 ensemble 1000017624 1000009172 1002042201 1000000022 796 ensemble=1 2005-01-01 ## 00:00:00 2011-12-31 00:00:00 ## ... ## $count ## [1] 50 16.4.19.2 Python Snippet # Get list of run belonging to the workflow with `workflow_id` = '1000009172' response = requests.get( "http://pecan.localhost/api/runs/?workflow_id=1000009172", auth=HTTPBasicAuth('carya', 'illinois') ) print(json.dumps(response.json(), indent=2)) ## { ## "runs": [ ## { ## "runtype": "ensemble", ## "ensemble_id": 1000017624, ## "workflow_id": 1000009172, ## "id": 1002042201, ## "model_id": 1000000022, ## "site_id": 796, ## "parameter_list": "ensemble=1", ## "start_time": "2005-01-01", ## "finish_time": "2011-12-31" ## }, ## ... ## ] ## "count": 50, ## "next_page": "http://pecan.localhost/api/workflows/?workflow_id=1000009172&offset=50&limit=50" ## } 16.4.20 GET /api/runs/{run_id} 16.4.20.1 R Snippet # Get details of run belonging with `id` = '99000000282' res <- httr::GET( "http://pecan.localhost/api/runs/99000000282", httr::authenticate("carya", "illinois") ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $runtype ## [1] "sensitivity analysis" ## $ensemble_id ## [1] 99000000016 ## $workflow_id ## [1] 99000000031 ## $id ## [1] 99000000282 ## $model_id ## [1] 1000000014 ## $site_id ## [1] 772 ## $start_time ## [1] "2002-01-01" ## $finish_time ## [1] "2002-12-31" ## $parameter_list ## [1] "quantile=MEDIAN,trait=all,pft=temperate.coniferous" ## $started_at ## [1] "2020-07-22 07:02:40" ## $finished_at ## [1] "2020-07-22 07:02:57" ## $inputs ## $inputs$info ## [1] "README.txt" ## $inputs$others ## [1] "sipnet.clim" "sipnet.in" "sipnet.param" "sipnet.param-spatial" ## $outputs ## $outputs$logfile ## [1] "logfile.txt" ## $outputs$info ## [1] "README.txt" ## $outputs$years ## $outputs$years$`2002` ## $outputs$years$`2002`$data ## [1] "2002.nc" ## $outputs$years$`2002`$variables ## $outputs$years$`2002`$variables$GPP ## [1] "Gross Primary Productivity" ## $outputs$years$`2002`$variables$NPP ## [1] "Net Primary Productivity" ## ... 16.4.20.2 Python Snippet # Get details of run with `id` = '1002042201' response = requests.get( "http://pecan.localhost/api/runs/1002042201", auth=HTTPBasicAuth('carya', 'illinois') ) print(json.dumps(response.json(), indent=2)) ## { ## "runtype": "ensemble", ## "ensemble_id": 1000017624, ## "workflow_id": 1000009172, ## "id": 1002042201, ## "model_id": 1000000022, ## "site_id": 796, ## "parameter_list": "ensemble=1", ## "start_time": "2005-01-01", ## "finish_time": "2011-12-31", ## "inputs": { ## "info": "README.txt", ## "others": [ ## "sipnet.clim", ## "sipnet.in", ## "sipnet.param", ## "sipnet.param-spatial" ## ] ## } ## "outputs": { ## "logfile": "logfile.txt", ## "info": "README.txt", ## "years": { ## "2002": { ## "data": "2002.nc", ## "variables": { ## "GPP": "Gross Primary Productivity", ## "NPP": "Net Primary Productivity", ## ... ## } ## } ## } ## } ## } 16.4.21 GET /api/runs/{run_id}/input/{filename} 16.4.21.1 R Snippet # Download the 'sipnet.in' input file for the run with id = 99000000282 res <- httr::GET( "http://pecan.localhost/api/runs/99000000282/input/sipnet.in", httr::authenticate("carya", "illinois") ) writeBin(res$content, "test.sipnet.in") 16.4.21.2 Python Snippet # Download the 'sipnet.in' input file for the run with id = 99000000282 response = requests.get( "http://pecan.localhost/api/runs/99000000282/input/sipnet.in", auth=HTTPBasicAuth('carya', 'illinois') ) with open("test.sipnet.in", "wb") as file: file.write(response.content) 16.4.22 GET /api/runs/{run_id}/output/{filename} 16.4.22.1 R Snippet # Download the '2002.nc' output file for the run with id = 99000000282 res <- httr::GET( "http://pecan.localhost/api/runs/99000000282/output/2002.nc", httr::authenticate("carya", "illinois") ) writeBin(res$content, "test.2002.nc") 16.4.22.2 Python Snippet # Download the '2002.nc' output file for the run with id = 99000000282 response = requests.get( "http://pecan.localhost/api/runs/99000000282/output/2002.nc", auth=HTTPBasicAuth('carya', 'illinois') ) with open("test.2002.nc", "wb") as file: file.write(response.content) 16.4.23 GET /api/runs/{run_id}/graph/{year}/{y_var} 16.4.23.1 R Snippet # Plot the Gross Primary Productivity vs Time for the run with ID `99000000282` for the year 2002 res <- httr::GET( "http://pecan.localhost/api/runs/99000000282/graph/2002/GPP", httr::authenticate("carya", "illinois") ) writeBin(res$content, "test.png") 16.4.23.2 Python Snippet # Plot the Gross Primary Productivity vs Time for the run with ID `99000000282` for the year 2002 response = requests.get( "http://pecan.localhost/api/runs/99000000282/graph/2002/GPP", auth=HTTPBasicAuth('carya', 'illinois') ) with open("test.png", "wb") as file: file.write(response.content) "],["database-sync.html", "17 Database synchronization", " 17 Database synchronization The database synchronization consists of 2 parts: - Getting the data from the remote servers to your server - Sharing your data with everybody else "],["how-does-it-work.html", "17.1 How does it work?", " 17.1 How does it work? Each server that runs the BETY database will have a unique machine_id and a sequence of ID’s associated. Whenever the user creates a new row in BETY it will receive an ID in the sequence. This allows us to uniquely identify where a row came from. This is information is crucial for the code that works with the synchronization since we can now copy those rows that have an ID in the sequence specified. If you have not asked for a unique ID your ID will be 99. The synchronization code itself is split into two parts, loading data with the load.bety.sh script and exporting data using dump.bety.sh. If you do not plan to share data, you only need to use load.bety.sh to update your database. "],["set-up.html", "17.2 Set up", " 17.2 Set up Requests for new machine ID’s is currently handled manually. To request a machine ID contact Rob Kooper kooper@illinois.edu. In the examples below this ID is referred to as ‘my siteid’. To setup the database to use this ID you need to call load.bety in ‘CREATE’ mode (replacing with the ID if your site) sudo -u postgres {$PECAN}/scripts/load.bety.sh -c -u -m <my siteid> WARNING: At the moment running CREATE deletes all current records in the database. If you are running from the VM this includes both all runs you have done and all information that the database is prepopulated with (e.g. input and model records). Remote records can be fetched (see below), but local records will be lost (we’re working on improving this!) "],["fetch-latest-data.html", "17.3 Fetch latest data", " 17.3 Fetch latest data When logged into the machine you can fetch the latest data using the load.bety.sh script. The script will check what site you want to get the data for and will remove all data in the database associated with that id. It will then reinsert all the data from the remote database. The script is configured using environment variables. The following variables are recognized: - DATABASE: the database where the script should write the results. The default is bety. - OWNER: the owner of the database (if it is to be created). The default is bety. - PG_OPT: additional options to be added to psql (default is nothing). - MYSITE: the (numerical) ID of your site. If you have not requested an ID, use 99; this is used for all sites that do not want to share their data (i.e. VM). 99 is in fact the default. - REMOTESITE: the ID of the site you want to fetch the data from. The default is 0 (EBI). - CREATE: If ‘YES’, this indicates that the existing database (bety, or the one specified by DATABASE) should be removed. Set to YES (in caps) to remove the database. THIS WILL REMOVE ALL DATA in DATABASE. The default is NO. - KEEPTMP: indicates whether the downloaded file should be preserved. Set to YES (in caps) to keep downloaded files; the default is NO. - USERS: determines if default users should be created. Set to YES (in caps) to create default users with default passwords. The default is NO. All of these variables can be specified as command line arguments as well, to see the options use -h. load.bety.sh -h ./scripts/load.bety.sh [-c YES|NO] [-d database] [-h] [-m my siteid] [-o owner] [-p psql options] [-r remote siteid] [-t YES|NO] [-u YES|NO] -c create database, THIS WILL ERASE THE CURRENT DATABASE, default is NO -d database, default is bety -h this help page -m site id, default is 99 (VM) -o owner of the database, default is bety -p additional psql command line options, default is empty -r remote site id, default is 0 (EBI) -t keep temp folder, default is NO -u create carya users, this will create some default users dump.bety.sh -h ./scripts/dump.bety.sh [-a YES|NO] [-d database] [-h] [-l 0,1,2,3,4] [-m my siteid] [-o folder] [-p psql options] [-u YES|NO] -a use anonymous user, default is YES -d database, default is bety -h this help page -l level of data that can be dumped, default is 3 -m site id, default is 99 (VM) -o output folder where dumped data is written, default is dump -p additional psql command line options, default is -U bety -u should unchecked data be dumped, default is NO "],["sharing-data.html", "17.4 Sharing data", " 17.4 Sharing data Sharing your data requires a few steps. First, before entering any data, you will need to request an ID from the PEcAn developers. Simply open an issue at github and we will generate an ID for you. If possible, add the URL of your data host. You will now need to synchronize the database again and use your ID. For example if you are given ID=42 you can use the following command: MYID=42 REMOTEID=0 ./scripts/load.bety.sh. This will load the EBI database and set the ID’s such that any data you insert will have the right ID. To share your data you can now run the dump.bey.sh. The script is configured using environment variables, the following variables are recognized: - DATABASE: the database where the script should write the results. The default is bety. - PG_OPT: additional options to be added to psql (default is nothing). - MYSITE: the ID of your site. If you have not requested an ID, use 99, which is used for all sites that do not want to share their data (i.e. VM). 99 is the default. - LEVEL: the minimum access-protection level of the data to be dumped (0=private, 1=restricted, 2=internal collaborators, 3=external collaborators, 4=public). The default level for exported data is level 3. - note that currently only the traits and yields tables have restrictions on sharing. If you share data, records from other (meta-data) tables will be shared. If you wish to extend the access_level to other tables please submit a feature request. - UNCHECKED: specifies whether unchecked traits and yields be dumped. Set to YES (all caps) to dump unchecked data. The default is NO. - ANONYMOUS: specifies whether all users be anonymized. Set to YES (all caps) to keep the original users (INCLUDING PASSWORD) in the dump file. The default is NO. - OUTPUT: the location of where on disk to write the result file. The default is ${PWD}/dump. NOTE: If you want your dumps to be accessible to other PEcAn servers you need to perform the following additional steps Open pecan/scripts/load.bety.sh In the DUMPURL section of the code add a new record indicating where you are dumping your data. Below is the example for SITE number 1 (Boston University) elif [ "${REMOTESITE}" == "1" ]; then DUMPURL="http://psql-pecan.bu.edu/sync/dump/bety.tar.gz" Check your Apache settings to make sure this location is public Commit this code and submit a Pull Request From the URL in the Pull Request, PEcAn administrators will update the machines table, the status map, and notify other users to update their cron jobs (see Automation below) Plans to simplify this process are in the works "],["automation.html", "17.5 Automation", " 17.5 Automation Below is an example of a script to synchronize PEcAn database instances across the network. db.sync.sh #!/bin/bash ## make sure psql is in PATH export PATH=/usr/pgsql-9.3/bin/:$PATH ## move to export directory cd /fs/data3/sync ## Dump Data MYSITE=1 /home/dietze/pecan/scripts/dump.bety.sh ## Load Data from other sites MYSITE=1 REMOTESITE=2 /home/dietze/pecan/scripts/load.bety.sh MYSITE=1 REMOTESITE=5 /home/dietze/pecan/scripts/load.bety.sh MYSITE=1 REMOTESITE=0 /home/dietze/pecan/scripts/load.bety.sh ## Timestamp sync log echo $(date +%c) >> /home/dietze/db.sync.log Typically such a script is set up to run as a cron job. Make sure to schedule this job (crontab -e) as the user that has database privileges (typically postgres). The example below is a cron table that runs the sync every hour at 12 min after the hour. MAILTO=user@yourUniversity.edu 12 * * * * /home/dietze/db.sync.sh "],["database-maintentance.html", "17.6 Database maintentance", " 17.6 Database maintentance All databases need maintenance performed on them. Depending upon the database type this can happen automatically, or it needs to be run through a scheduler or manually. The BETYdb database is Postgresql and it needs to be reindexed and vacuumed on a regular basis. Reindexing introduces efficiencies back into the database by reorganizing the indexes. Vacuuming the database frees up resources to the database by rearranging and compacting the database. Both of these operations are necessary and safe. As always if there’s a concern, a backup of the database should be made ahead of time. While running the reindexing and vacuuming commands, users will notice a slowdown at times. Therefore it’s better to run these maintenance tasks during off hours. 17.6.1 Reindexing the database As mentioned above, reindexing allows the database to become more efficient. Over time as data gets updated and deleted, the indexes become less efficient. This has a negative inpact on executed statements. Reindexing makes the indexes efficient again (at least for a while) allowing faster statement execution and reducing the overall load on the database. The reindex.bety.sh script is provided to simplify reindexing the database. reindex.bety.sh -h ./reindex.bety.sh [-c datalog] [-d database] [-h] [-i table names] [-p psql options] [-q] [-s] [-t tablename] -c catalog, database catalog name used to search for tables, default is bety -d database, default is bety -h this help page -i table names, list of space-separated table names to skip over when reindexing -p additional psql command line options, default is -U bety -q the reindexing should be quiet -s reindex the database after reindexing the tables (this should be done sparingly) -t tablename, the name of the one table to reindex If the database is small enough it’s reasonable to reindex the entire database at one time. To do this manually run or schedule the REINDEX statement. For example: reindex.bety.sh -s For larger databases it may be desireable to reindex entire tables at a time. An efficient way to do this is to reindex the larger tables and then the entire database. For example: reindex.bety.sh -t traits; reindex.bety.sh -t yields; reindex.bety.sh -s For very large databases it may be desirable to reindex one or more individual indexes before reindexing tables and the databases. In this case running specific psql commands to reindex those specific indexes, followed by reindexing the table is a possible approach. For example: psql -U bety -c "REINDEX INDEX index_yields_on_citation_id; REINDEX INDEX index_yields_on_cultivar_id;" reindex.bety.sh -t yields; Splitting up the indexing commands over time allows the database to operate efficiently with minimal impact on users. One approach is to schedule the reindexing of large, complex tables at a spcific off-time during the week, followed by a general reindexing and excluding those large tables on a weekend night. Please refere to the Automation section above for information on using cron to schedule reindexing commands. 17.6.2 Vacuuming the database Vacuuming the BETYdb Postgresql database reduces the amount of resources it uses and introduces its own efficiencies. Over time, modified and deleted records leave ‘holes’ in the storage of the database. This is a common feature for most databases. Each database has its own way of handing this, in Postgresql it’s the VACUUM command. The VACUUM command performs two main operations: cleaning up tables to make memory use more efficient, and analyze tables for optimum statement execution. The use of the keyword ANALYZE indicates the second operation should take place. The vacuum.bety.sh script is provided to simplify vacuuming the database. vacuum.bety.db -h ./vacuum.bety.sh [-c datalog] [-d database] [-f] [-h] [-i table names] [-n] [-p psql options] [-q] [-s] [-t tablename] [-z] -c catalog, database catalog name used to search for tables, default is bety -d database, default is bety -f perform a full vacuum to return resources to the system. Specify rarely, if ever -h this help page -i table names, list of space-separated table names to skip over when vacuuming -n only vacuum the tables and do not analyze, default is to first vacuum and then analyze -p additional psql command line options, default is -U bety -q the export should be quiet -s skip vacuuming the database after vacuuming the tables -t tablename, the name of the one table to vacuum -z only perform analyze, do not perform a regular vacuum, overrides -n and -f, sets -s For small databases with light loads it may be possible to set aside a time for a complete vacuum. During this time, commands executed against the database might fail (a temporary condition as the database gets cleaned up). The following commands can be used to perform all the vaccum operations in one go. vacuum.bety.sh -f Generally it’s not desireable to have down time. If the system running the database doesn’t need resources that the database is using returned to it, a FULL vacuum can be avoided. This is the default behavior of the script vacuum.bety.sh In larger databases, vacuuming the entire database can take a long time causing a negative impact on users. This means that individual tables need to be vacuumed. How often a vacuum needs to be performed is dependent upon a table’s activity. The more frequently updates and deletes occur on a table, the more frequent the vaccum should be. For large tables it may be desireable to separate the table cleanup from the analysis. An example for completely vacuuming and analyzing a table is: psql -U bety -c "VACUUM traits; VACUUM ANALYZE traits;" Similar to indexes, vacuuming the most active tables followed by general database vacuuming and vacuum analyze may be a desireable approach. Also note that it isn’t necessary to run VACUUM ANALYZE for each vacuum performed. Separating the commands and performing a VACUUM ANALYZE after several regular vacuums may be sufficient, with less load on the database. If the BETYdb database is running on a system with limited resources, or with resources that have become limited, the VACCUM command can return resources to the system from the database. The normal vacuuming process releases resources back to the database for reuse, but not to the system; generally this isn’t a problem. Postgresql has a VACUUM keyword FULL that returns resources back to the system. Requesting a FULL vacuum will lock the table being vacuumed while it is being re-written preventing any statements from being executed against it. If performing VECUUM FULL against the entire database, only the table being actively worked on is locked. To minimize the impact a VACUUM FULL has on users, it’s best to perform a normal vacuum before a FULL vacuum. If this approach is taken, there sould be a minimal time gap between the normal VACUUM and the VACUUM FULL commands. A normal vacuum allows changes to be made thus requiring the full vacuum to handle those changes, extending it’s run time. Reducing the time between the two commands lessens the work VACUUM FULL needs to do. psql -U bety -c "VACUUM yields; VACUUM FULL yields; VACUUM ANALYZE yields;" Give its impact, it’s typically not desireable to perform a VACUUM FULL after every normal vacuum; it should be done on an “as needed” basis or infrequently. "],["troubleshooting-1.html", "17.7 Troubleshooting", " 17.7 Troubleshooting There are several possibilities if a scheduled cron job apepars to be running but isn’t producing the expected results. The following are suggestions on what to try to resolve the issue. 17.7.1 Username and password The user that scheduled a cron job may not have access permissions to the database. This can be easily confirmed by running the command line from the cron job while logged in as the user that scheduled the job. An error message will be shown if the user doesn’t have permissions. To resolve this, be sure to include a valid database user (not a BETYdb user) with their credentials on the command in crontab. 17.7.2 db_hba.conf file Iit’s possible that the machine hosting the docker image of the database doesn’t have permissions to access the database. This may due to the cron job running on a machine that is not the docker instance of the database. It may be necessary to look at the loga on the hosting machine to determine if database access permissions are causing a problem. Logs are stored in different locations depending upon the Operating System of the host and upon other environmental factors. This document doesn’t provide information on where to find the logs. To begin, it’s best to look at the contents of the relevent database configuration file. The following command will display the contents of the db_hba.conf file. psql -U postgres -qAt -c "show hba_file" | xargs grep -v -E '^[[:space:]]*#' This command should return a series of text lines. For each row except those begining with ‘local’, the fourth item describes the machines that can access the database. In some cases an IP mask is specified in the fifth that further restricts the machines that have access. The special work ‘all’ in the fourth column grants permissions to all machines. The last column on each line contains the authentication option for the machine(s) specified in the fourth column (with a possible fifth column IP mask modifier). Ensure that the host machine is listed under the fourth column (machine addresse range, or ‘all’), is also included in the IP mask if one was specified, and finally that any authentication option are not set to ‘reject’. If the host machine is not included the db_hba.conf file will need to be updated to allow access. "],["network-status-map.html", "17.8 Network Status Map", " 17.8 Network Status Map https://pecan2.bu.edu/pecan/status.php Nodes: red = down, yellow = out-of-date schema, green = good Edges: red = fail, yellow = out-of-date sync, green = good "],["tasks.html", "17.9 Tasks", " 17.9 Tasks Following is a list of tasks we plan on working on to improve these scripts: - pecanproject/bety#368 allow site-specific customization of information and UI elements including title, contacts, logo, color scheme. "],["standalone-tools-modules.html", "18 Standalone tools (modules)", " 18 Standalone tools (modules) Radiative transfer modeling and remote sensing (modules/rtm); vignette Photosynthesis (modules/photosynthesis); vignette Allometry (modules/allometry); vignette Load data (modules/benchmark – PEcAn.benchmark::load_data) "],["LoadData.html", "18.1 Loading Data in PEcAn", " 18.1 Loading Data in PEcAn If you are loading data in to PEcAn for benchmarking, using the Benchmarking shiny app [provide link?] is recommended. Data can be loaded manually using the load_data function which in turn requires providing data format information using query.format.vars and the path to the data using query.file.path. Below is a description of the load_data function an a simple example of loading data manually. 18.1.1 Inputs Required data.path: path to the data that is the output of the function query.file.path (see example below) format: R list object that is the output of the function query.format.vars (see example below) Optional start_year = NA: end_year = NA: site = NA vars.used.index=NULL 18.1.2 Output R data frame containing the requested variables converted in to PEcAn standard name and units and time steps in POSIX format. 18.1.3 Example The data for this example has already been entered in to the database. To add new data go to new data documentation. To load the Ameriflux data for the Harvard Forest (US-Ha1) site. Create a connection to the BETY database. This can be done using R function bety = PEcAn.DB::betyConnect(php.config = "pecan/web/config.php") where the complete path to the config.php is specified. See here for an example config.php file. Look up the inputs record for the data in BETY. To find the input ID, either look at The url of the record (see image above) In R run library(dplyr) input_name = "AmerifluxLBL_site_0-758" #copied directly from online input.id = tbl(bety,"inputs") %>% filter(name == input_name) %>% pull(id) Additional arguments to query.format.vars are optional If you only want to load a subset of dates in the data, specify start and end year, otherwise all data will be loaded. If you only want to load a select list of variables from the data, look up their IDs in BETY, otherwise all variables will be loaded. In R run format = PEcAn.DB::query.format.vars(bety, input.id) Examine the resulting R list object to make sure it returned the correct information. The example format contains the following objects: $file_name [1] "AMERIFLUX_BASE_HH" $mimetype [1] "csv" $skip [1] 2 $header [1] 1 $na.strings [1] "-9999" "-6999" "9999" "NA" $time.row [1] 4 $site [1] 758 $lat [1] 42.5378 $lon [1] -72.1715 $time_zone [1] "America/New_York" The first 4 rows of the table format$vars looks like this: bety_name variable_id input_name input_units storage_type column_number bety_units mstmip_name mstmip_units pecan_name pecan_units air_pressure 554 PA kPa 19 Pa Psurf Pa Psurf Pa airT 86 TA celsius 4 degrees C Tair K Tair K co2atm 135 CO2_1 umol mol-1 20 umol mol-1 CO2air micromol mol-1 CO2air micromol mol-1 datetime 5000000001 TIMESTAMP_START ymd_hms %Y%m%d%H%M 1 ymd_hms NA NA datetime ymd_hms Get the path to the data data.path = PEcAn.DB::query.file.path( input.id = input.id, host_name = PEcAn.remote::fqdn(), con = bety) Load the data data = PEcAn.benchmark::load_data(data.path = data.path, format = format) "],["remote-data-module.html", "18.2 Remote data module", " 18.2 Remote data module Remote data module retrieves remote sensing data from MODISTools, Google Earth Engine and AppEEARS, as well as from the NASA’s Land Processes Distributed Active Archive Center (LP DAAC) data server. For currently available R functions see here, and for the Python functions see RpTools. The downloaded data can be used while performing further analyses in PEcAn. 18.2.0.1 Google Earth Engine Google Earth Engine is a cloud-based platform for performing analysis on satellite data. It provides access to a large data catalog through an online JavaScript code editor and a Python API. Datasets currently available for use in PEcAn via Google Earth Engine are, Sentinel-2 MSI gee2pecan_s2(). It is possible to estimate Leaf Area Index (LAI) from Sentinel-2 data using the ESA SNAP algorithm. SMAP Global Soil Moisture Data gee2pecan_smap() Landsat 8 Surface Reflectance gee2pecan_l8() Global Forest Canopy Height, 2019 gee2pecan_gedi() 18.2.0.2 AppEEARS AppEEARS (Application for Extracting and Exploring Analysis Ready Samples) is an online tool which provides an easy to use interface for downloading analysis ready remote sensing data. Products available on AppEEARS. Note: AppEEARS uses a task based system for processing the data request, it is possible for a task to run for long hours before it gets completed. The module checks the task status after every 60 seconds and saves the files when the task gets completed. 18.2.0.3 LP DAAC Data Pool LP DAAC (Land Processes Distributed Active Archive Center) Data Pool is a NASA Earthdata login-enabled server located at the USGS Earth Resources Observation and Science (EROS) Center that archives and distributes land data products. Similar to AppEEARS, using this source also requires an Earthdata account (see below). Currently this pipeline is implemented and tested only for the GEDI dataset. For more information about the data you are downloading, including documentation and how to properly cite the data, please visit https://lpdaac.usgs.gov/. 18.2.0.4 Set-Up instructions (first time and one time only): Sign up for the Google Earth Engine. Follow the instructions here to sign up for using GEE. You need to have your own GEE account for using the GEE download functions in this module. Sign up for NASA Earthdata. Using AppEEARS and LP DAAC Data Pool requires an Earthdata account visit this page to create your own account. Install the RpTools package. Python codes required by this module are stored in a Python package named “RpTools” using this requires Python3 and the package manager pip3 to be installed in your system. To install the package, Navigate to pecan/modules/data.remote/inst/RpTools If you are inside the pecan directory, this can be done by, cd modules/data.remote/inst/RpTools Use pip3 to install the package. “-e” flag is used to install the package in an editable or develop mode, so that changes made to the code get updated in the package without reinstalling. pip3 install -e . Authenticate GEE. The GEE API needs to be authenticated using your credentials. The credentials will be stored locally on your system. This can be done by, #this will open a browser and ask you to sign in with the Google account registered for GEE earthengine authenticate Alternate way, python3 import ee ee.Authenticate() Save the Earthdata credentials. If you wish to use AppEEARS or LP DAAC data pool you will have to store your username and password inside a JSON file and then pass its file path as an argument in remote_process 18.2.0.5 Usage guide: This module is accesible using the R function remote_process which uses the Python package “RpTools” (located at data.remote/inst/RpTools) for downloading and processing data. RpTools has a function named rp_control which controls two other functions, get_remote_data which controls the scripts which are used for downloading data from the source. For example, gee2pecan_s2 downloads bands from Sentinel 2 using GEE. process_remote_data which controls the scripts responsible for processing the raw data. For example, bands2lai_snap uses the downloaded bands to compute LAI using the SNAP algorithm. Workflow of the module 18.2.1 Configuring remote_process remote_process is configured using remote data tags in the pecan.xml. The required tags are described below, <remotedata> <out_get_data>...</out_get_data> <source>...</source> <collection>...</collection> <scale>...</scale> <projection>...</projection> <qc>...</qc> <algorithm>...</algorithm> <credfile>...</credfile> <out_process_data>...</out_process_data> <overwrite>...</overwrite> </remotedata> out_get_data: (required) type of raw output requested, e.g, bands, smap source: (required) source of remote data, e.g., gee or appeears collection: (required) dataset or product name as it is provided on the source, e.g. “COPERNICUS/S2_SR” for gee or “SPL3SMP_E.003” for appeears scale: (optional) pixel resolution required for some gee collections, recommended to use 10 for Sentinel 2 scale Information about how GEE handles scale can be found out here projection: (optional) type of projection. Only required for appeears polygon AOI type qc: (optional) quality control parameter, required for some gee collections overwrite: (optional) if TRUE database checks will be skipped and existing data of same type will be replaced entirely. When processed data is requested, the raw data required for creating it will also be replaced. By default FALSE If you don’t want to enter your Earthdata credentials everytime you use AppEEARS or LP DAAC use the following tag too: credfile: (optional) absolute path to JSON file containing Earthdata username and password, only required when using AppEEARS and LP DAAC data pool. The contents of this file could be as simple as the following: { "username": "yourEARTHDATAusername", "password": "yourEARTHDATApassword" } These tags are only required if processed data (i.e. further processing after downloading the data) is requested: out_process_data: (optional) type of processed output requested, e.g, lai algorithm: (optional) algorithm used for processing data, currently only SNAP is implemented to estimate LAI from Sentinel-2 bands Additional information are taken from the registration files located at pecan/modules/data.remote/inst/registration, each source has its own registration file. This is so because there isn’t a standardized way to retrieve all image collections from GEE and each image collection may require its own way of performing quality checks, etc whereas all of the products available on AppEEARS can be retrieved using its API in a standardized way. GEE registration file (register.GEE.xml) : collection original_name original name of the image collection, e.g. COPERNICUS/S2_SR pecan_name short form of original name using which the collection is represented in PEcAn, e.g. s2 coord coord_type coordinate type supported by the collection scale the default value of the scale can be specified here qc the default value of the qc parameter can be specified here raw_format format details of the raw file id id of the format name name of the format mimetype MIME type pro_format format details of the processed file when the collection is used to create a processed file id id of the format name name of the format mimetype MIME type AppEEARS and LP DAAC Data Pool registration files (register.APPEEARS.xml and register.LPDAACDATAPOOL.xml) : coord coord_type coordinate type supported by the product raw_format format details of the output file id id of the format name name of the format mimetype MIME type Remaining input data: start date, end date: these are taken from the run tag in pecan.xml outdir: from the outdir tag in pecan.xml Area of interest: the coordinates and site name are found out from BETY using siteid present in the run tag. These are then used to create a GeoJSON file which is used by the download functions. The output data from the module are returned in the following tags: raw_id: input id of the raw file raw_path: absolute path to the raw file pro_id: input id of the processed file pro_path: absolute path to the processed file Output files: The output files are of netCDF type and are stored in a directory inside the specified outdir with the following naming convention: source_site_siteid The output files are created with the following naming convention: source_collection_scale_projection_qc_site_siteid_TimeStampOfFileCreation Whenever a data product is requested the output files are stored in the inputs table of BETYdb. Subsequently when the same product is requested again with a different date range but with the same qc, scale, projection the previous file in the db would be extended. The DB would always contain only one file of the same type. As an example, if a file containing Sentinel 2 bands for start date: 2018-01-01, end date: 2018-06-30 exists in the DB and the same product is requested again for a different date range one of the following cases would happen, New dates are ahead of the existing file: For example, if the requested dates are start: 2018-10-01, end: 2018-12-31 in this case the previous file will be extended forward meaning the effective start date of the file to be downloaded would be the day after the end date of the previous file record, i.e. 2018-07-01. The new and the previous file would be merged and the DB would now be having data for 2018-01-01 to 2018-12-31. New dates are preceding of the existing file: For example, if the requested dates are start: 2017-01-01, end: 2017-06-30 in this case the effective end date of the new download would be the day before the start date of the existing file, i.e., 2017-12-31. The new and the previous file would be merged and the file in the DB would now be having data for 2017-01-01 to 2018-06-30. New dates contain the date range of the existing file: For example, if the requested dates are start: 2016-01-01, end: 2019-06-30 here the existing file would be replaced entirely with the new file. A more efficient way of doing this could be to divide your request into two parts, i.e, first request for 2016-01-01 to 2018-01-01 and then for 2018-06-30 to 2019-06-30. When a processed data product such as SNAP-LAI is requested, the raw product (here Sentinel 2 bands) used to create it would also be stored in the DB. If the raw product required for creating the processed product already exists for the requested time period, the processed product would be created for the entire time period of the raw file. For example, if Sentinel 2 bands are present in the DB for 2017-01-01 to 2017-12-31 and SNAP-LAI is requested for 2017-03-01 to 2017-07-31, the output file would be containing LAI for 2017-01-01 to 2017-12-31. 18.2.1.1 Creating Polygon based sites A polygon site can be created in the BETYdb using the following way, PEcAn.DB::db.query("insert into sites (country, sitename, geometry) values ('country_name', 'site_name', ST_SetSRID(ST_MakePolygon(ST_GeomFromText('LINESTRING(lon lat elevation)')), crs));", con) Example, db.query("insert into sites (country, sitename, geometry) values ('FI', 'Qvidja_ca6cm', ST_SetSRID(ST_MakePolygon(ST_GeomFromText('LINESTRING(22.388957339620813 60.287395608412218 14.503780364990234, 22.389600591651835 60.287182336733203 14.503780364990234, 22.38705422266651 60.285516177775868 14.503780364990234, 22.386575219445195 60.285763643883932 14.503780364990234, 22.388957339620813 60.287395608412218 14.503780364990234 )')), 4326));", con) 18.2.1.2 Example use (GEE) This example will download Sentinel 2 bands and then use the SNAP algorithm to compute Leaf Area Index. Add remotedata tag to pecan.xml and configure it. <remotedata> <out_get_data>bands</out_get_data> <source>gee</source> <collection>COPERNICUS/S2_SR</collection> <scale>10</scale> <qc>1</qc> <algorithm>snap</algorithm> <out_process_data>LAI</out_process_data> </remotedata> Store the contents of pecan.xml in a variable named settings and pass it to remote_process. PEcAn.data.remote::remote_process(settings) The output netCDF files(bands and LAI) will be saved at outdir and their records would be kept in the inputs table of BETYdb. 18.2.1.3 Example use (AppEEARS) This example will download the layers of a SMAP product(SPL3SMP_E.003) Add remotedata tag to pecan.xml and configure it. <remotedata> <out_get_data>smap</out_get_data> <source>appeears</source> <collection>SPL3SMP_E.003</collection> <projection>native</projection> <algorithm></algorithm> <credfile>path/to/jsonfile/containingcredentials</credfile> </remotedata> Store the contents of pecan.xml in a variable named settings and pass it to remote_process. PEcAn.data.remote::remote_process(settings) The output netCDF file will be saved at outdir and its record would be kept in the inputs table of BETYdb. 18.2.1.4 Example use GEDI (LP DAAC data pool) <remotedata> <out_get_data>gedi</out_get_data> <source>lpdaacdatapool</source> <collection>GEDI02_B.002</collection> <credfile>path/to/jsonfile/containingcredentials</credfile> </remotedata> Store the contents of pecan.xml in a variable named settings and pass it to remote_process. PEcAn.data.remote::remote_process(settings) 18.2.1.5 Adding new GEE image collections Once you have the Python script for downloading the collection from GEE, please do the following to integrate it with this module. Make sure that the function and script names are same and named in the following way: gee2pecan_pecancodeofimagecollection pecancodeofimagecollection can be any name which you want to use for representing the collection is an easier way. Additionaly, ensure that the function accepts and uses the following arguments, geofile - (str) GeoJSON file containing AOI information of the site outdir - (str) path where the output file has to be saved start - (str) start date in the form YYYY-MM-DD end - (str) end date in the form YYYY-MM-DD scale and qc if applicable. Make sure the output file is of netCDF type and follows the naming convention described above. Store the Python script at pecan/modules/data.remote/inst/RpTools/RpTools Update the register.GEE.xml file. After performing these steps the script will be integrated with the remote data module and would be ready to use. "],["shiny.html", "19 Shiny ", " 19 Shiny "],["testing-the-shiny-server.html", "19.1 Testing the Shiny Server", " 19.1 Testing the Shiny Server Shiny can be difficult to debug because, when run as a web service, the R output is hidden in system log files that are hard to find and read. One useful approach to debugging is to use port forwarding, as follows. First, on the remote machine (including the VM), make sure R’s working directory is set to the directory of the Shiny app (e.g., setwd(/path/to/pecan/shiny/WorkflowPlots), or just open the app as an RStudio project). Then, in the R console, run the app as: shiny::runApp(port = XXXX) # E.g. shiny::runApp(port = 5638) Then, on your local machine, open a terminal and run the following command, matching XXXX to the port above and YYYY to any unused port on your local machine (any 4-digit number should work). ssh -L YYYY:localhost:XXXX <remote connection> # E.g., for the PEcAn VM, given the above port: # ssh -L 5639:localhost:5638 carya@localhost -p 6422 Now, in a web browser on your local machine, browse to localhost:YYYY (e.g., localhost:5639) to run whatever app you started with shiny::runApp in the previous step. All of the output should display in the R console where the shiny::runApp command was executed. Note that this includes any print, message, logger.*, etc. statements in your Shiny app. If the Shiny app hits an R error, the backtrace should include a line like Hit error at of server.R#LXX – that XX being a line number that you can use to track down the error. To return from the error to a normal R prompt, hit <Control>-C (alternatively, the “Stop” button in RStudio). To restart the app, run shiny::runApp(port = XXXX) again (keeping the same port). Note that Shiny runs any code in the pecan/shiny/<app> directory at the moment the app is launched. So, any changes you make to the code in server.R and ui.R or scripts loaded therein will take effect the next time the app is started. If for whatever reason this doesn’t work with RStudio, you can always run R from the command line. Also, note that the ability to forward ports (ssh -L) may depend on the ssh configuration of your remote machine. These instructions have been tested on the PEcAn VM (v.1.5.2+). "],["debugging-shiny-apps.html", "19.2 Debugging Shiny Apps", " 19.2 Debugging Shiny Apps When developing shiny apps you can run the application from rstudio and place breakpoints int he code. To do this you will need to do the following steps first (already done on the VM) before starting rstudio: - echo “options(shiny.port = 6438)” >> ${HOME}/.Rprofile - echo “options(shiny.launch.browser = ‘FALSE’)” >> ${HOME}/.Rprofile Next you will need to create a tunnel for port 6438 to the VM, which will be used to open the shiny app, the following command will creat this tunnel: ssh -l carya -p 6422 -L 6438:localhost:6438 localhost. Now you can from rstudio run your application using shiny::runApp() and it will show the output from the application in your console. You can now place breakpoints and evaluate the output. "],["checking-log-files.html", "19.3 Checking Log Files", " 19.3 Checking Log Files To create Log files on the VM, execute the following: sudo -s echo "preserve_logs true;" >> /etc/shiny-server/shiny-server.conf service shiny-server restart Then within the directory /var/log/shiny-server you will see log files for your specific shiny apps. "],["adding-to-pecan.html", "20 Adding to PEcAn", " 20 Adding to PEcAn Case studies Adding a model Adding input data Adding data through the web interface Adding new species, PFTs, and traits from a new site Add a site Add some species Add PFT Add trait data Adding a benchmark Adding a met driver Reference (How to edit records in bety) Models Species PFTs Traits Inputs DB files Variables Formats (Link each section to relevant Bety tables) "],["adding-model.html", "20.1 Adding An Ecosystem Model", " 20.1 Adding An Ecosystem Model Adding a model to PEcAn involves two activities: Updating the PEcAn database to register the model Writing the interface modules between the model and PEcAn Note that coupling a model to PEcAn should not require any changes to the model code itself. A key aspect of our design philosophy is that we want it to be easy to add models to the system and we want to using the working version of the code that is used by all other model users, not a special branch (which would rapidly end up out-of-date). 20.1.1 Using PEcAn Database To run a model within PEcAn requires that the PEcAn database has sufficient information about the model. This includes a MODEL_TYPE designation, the types of inputs the model requires, the location of the model executable, and the plant functional types used by the model. The instructions in this section assume that you will be specifying this information using the BETYdb web-based interface. This can be done either on your local VM (localhost:3280/bety or localhost:6480/bety) or on a server installation of BETYdb. However you interact with BETYdb, we encourage you to set up your PEcAn instance to support database syncs so that these changes can be shared and backed-up across the PEcAn network. The figure below summarizes the relevant database tables that need to be updated to add a new model and the primary variables that define each table. 20.1.2 Define MODEL_TYPE The first step to adding a model is to create a new MODEL_TYPE, which defines the abstract model class. This MODEL_TYPE is used to specify input requirements, define plant functional types, and keep track of different model versions. The MODEL_TYPE is created by selecting Runs > Model Type and then clicking on New Model Type. The MODEL_TYPE name should be identical to the MODEL package name (see Interface Module below) and is case sensitive. 20.1.3 MACHINE The PEcAn design acknowledges that the same model executables and input files may exist on multiple computers. Therefore, we need to define the machine that that we are using. If you are running on the VM then the local machine is already defined as pecan. Otherwise, you will need to select Runs > Machines, click New Machine, and enter the URL of your server (e.g. pecan2.bu.edu). 20.1.4 MODEL Next we are going to tell PEcAn where the model executable is. Select Runs > Files, and click ADD. Use the pull down menu to specify the machine you just defined above and fill in the path and name for the executable. For example, if SIPNET is installed at /usr/local/bin/sipnet then the path is /usr/local/bin/ and the file (executable) is sipnet. Now we will create the model record and associate this with the File we just registered. The first time you do this select Runs > Models and click New Model. Specify a descriptive name of the model (which doesn’t have to be the same as MODEL_TYPE), select the MODEL_TYPE from the pull down, and provide a revision identifier for the model (e.g. v3.2.1). Once the record is created select it from the Models table and click EDIT RECORD. Click on “View Related Files” and when the search window appears search for the model executable you just added (if you are unsure which file to choose you can go back to the Files menu and look up the unique ID number). You can then associate this Model record with the File by clicking on the +/- symbol. By contrast, clicking on the name itself will take you to the File record. In the future, if you set up the SAME MODEL VERSION on a different computer you can add that Machine and File to PEcAn and then associate this new File with this same Model record. A single version of a model should only be entered into PEcAn once. If a new version of the model is developed that is derived from the current version you should add this as a new Model record but with the same MODEL_TYPE as the original. Furthermore, you should set the previous version of the model as Parent of this new version. 20.1.5 FORMATS The PEcAn database keep track of all the input files passed to models, as well as any data used in model validation or data assimilation. Before we start to register these files with PEcAn we need to define the format these files will be in. To create a new format see Formats Documentation. 20.1.6 MODEL_TYPE -> Formats For each of the input formats you specify for your model, you will need to edit your MODEL_TYPE record to add an association between the format and the MODEL_TYPE. Go to Runs > Model Type, select your record and click on the Edit button. Next, click on “Edit Associated Formats” and choose the Format you just defined from the pull down menu. If the Input box is checked then all matching Input records will be displayed in the PEcAn site run selection page when you are defining a model run. In other words, the set of model inputs available through the PEcAn web interface is model-specific and dynamically generated from the associations between MODEL_TYPEs and Formats. If you also check the Required box, then the Input will be treated as required and PEcAn will not run the model if that input is not available. Furthermore, on the site selection webpage, PEcAn will filter the available sites and only display pins on the Google Map for sites that have a full set of required inputs (or where those inputs could be generated using PEcAn’s workflows). Similarly, to make a site appear on the Google Map, all you need to do is specify Inputs, as described in the next section, and the point should automatically appear on the map. 20.1.7 INPUTS After a file Format has been created then input files can be registered with the database. Creating Inputs can be found under How to insert new Input data. 20.1.8 Add Plant Functional Types (PFTs) Since many of the PEcAn tools are designed to keep track of parameter uncertainties and assimilate data into models, to use PEcAn with a model it is important to define Plant Functional Types for the sites or regions that you will be running the model. Create a new PFT entry by selecting Data > PFTs and then clicking on New PFT. Give the PFT a descriptive name (e.g., temperate deciduous). PFTs are MODEL_TYPE specific, so choose your MODEL_TYPE from the pull down menu. 20.1.8.1 Species Within PEcAn there are no predefined PFTs and user can create new PFTs very easily at whatever taxonomic level is most appropriate, from PFTs for individual species up to one PFT for all plants globally. To allow PEcAn to query its trait database for information about a PFT, you will want to associate species with the PFT record by choosing Edit and then “View Related Species”. Species can be searched for by common or scientific name and then added to a PFT using the +/- button. 20.1.8.2 Cultivars You can also define PFTs whose members are cultivars instead of species. This is designed for analyses where you want to want to perform meta-analysis on within-species comparisons (e.g. cultivar evaluation in an agricultural model) but may be useful for other cases when you want to specify different priors for some member of a species. You cannot associate both species and cultivars with the same PFT, but the cultivars in a cultivar PFT may come from different species, potentially including all known cultivars from some of the species, if you wish to and have thought about how to interpret the results. It is not yet possible to add a cultivar PFT through the BETYdb web interface. See this GithHub comment for an example of how to define one manually in PostgreSQL. 20.1.9 Adding Priors for Each Variable In addition to adding species, a PFT is defined in PEcAn by the list of variables associated with the PFT. PEcAn takes a fundamentally Bayesian approach to representing model parameters, so variables are not entered as fixed constants but as prior probability distributions. There are a wide variety of priors already defined in the PEcAn database that often range from very diffuse and generic to very informative priors for specific PFTs. These pre-existing prior distributions can be added to a PFT. Navigate to the PFT from Data > PFTs and selecting the edit button in the Actions column for the chosen PFT. Click on “View Related Priors” button and search through the list for desired prior distributions. The list can be filtered by adding terms into the search box. Add a prior to the PFT by clicking on the far left button for the desired prior, changing it to an X. Save this by scrolling to the bottom of the PFT page and hitting the Update button. 20.1.9.1 Creating new prior distributions A new prior distribution can be created for a pre-existing variable, if a more constrained or specific one is known. Select Data > Priors then “New Prior” In the Citation box, type in or select an existing reference that indicates how the prior was defined. There are a number of unpublished citations in current use that simply state the expert opinion of an individual Fill the Variable box by typing in part or all of a pre-existing variable’s name and selecting it The Phylogeny box allows one to specify what taxonomic grouping the prior is defined for, at it is important to note that this is just for reference and doesn’t have to be specified in any standard way nor does it have to be monophyletic (i.e. it can be a functional grouping) The prior distribution is defined by choosing an option from the drop-down Distribution box, and then specifying values for both Parameter a and Parameter b. The exact meaning of the two parameters depends on the distribution chosen. For example, for the Normal distribution a and b are the mean and standard deviation while for the Uniform they are the minimum and maximum. All parameters are defined based on their standard parameterization in the R language Specify the prior sample size in N if the prior is based on observed data (independent of data in the PEcAn database) When this is done, scroll down and hit the Create button The new prior distribution can then be added a PFT as described in the “Adding Priors for Each Variable” section. 20.1.9.2 Creating new variables It is important to note that the priors are defined for the variable name and units as specified in the Variables table. If the variable name or units is different within the model it is the responsibility of write.configs.MODEL function to handle name and unit conversions (see Interface Modules below). This can also include common but nonlinear transformations, such as converting SLA to LMA or changing the reference temperature for respiration rates. To add a new variable, select Data > Variables and click the New Variable button. Fill in the Name field with the desired name for the variable and the units in the Units field. There are additional fields, such as Standard Units, Notes, and Description, that can be filled out if desired. When done, hit the Create button. The new variable can be used to create a prior distribution for it as in the “Creating new prior distributions” section. 20.1.10 Interface Modules 20.1.10.1 Setting up the module directory (required) PEcAn assumes that the interface modules are available as an R package in the models directory named after the model in question. The simplest way to get started on that R package is to make a copy the template directory in the pecan/models folder and re-name it to the name of your model. In the code, filenames, and examples below you will want to substitute the word MODEL for the name of your model (note: R is case-sensitive). If you do not want to write the interface modules in R then it is fairly simple to set up the R functions describe below to just call the script you want to run using R’s system command. Scripts that are not R functions should be placed in the inst folder and R can look up the location of these files using the function system.file which takes as arguments the local path of the file within the package folder and the name of the package (typically PEcAn.MODEL). For example "],["example-met-conversion-wrapper-function.html", "20.2 Example met conversion wrapper function", " 20.2 Example met conversion wrapper function met2model.MODEL <- function(in.path, in.prefix, outfolder, start_date, end_date){ myMetScript <- system.file(“inst/met2model.MODEL.sh”, “PEcAn.MODEL”) system(paste(myMetScript, file.path(in.path, in.prefix), outfolder, start_date, end_date)) } would execute the following at the Linux command line inst/met2model.MODEL.sh in.path/in.prefix outfolder start_date end_date ` 20.2.0.1 DESCRIPTION Within the module folder open the DESCRIPTION file and change the package name to PEcAn.MODEL. Fill out other fields such as Title, Author, Maintainer, and Date. 20.2.0.2 NAMESPACE Open the NAMESPACE file and change all instances of MODEL to the name of your model. If you are not going to implement one of the optional modules (described below) at this time then you will want to comment those out using the pound sign #. For a complete description of R NAMESPACE files see here. If you create additional functions in your R package that you want to be used make sure you include them in the NAMESPACE as well (internal functions don’t need to be declared) 20.2.0.3 Building the package Once the package is defined you will then need to add it to the PEcAn build scripts. From the root of the pecan directory, go into the scripts folder and open the file build.sh. Within the section of code that includes PACKAGES= add model/MODEL to the list of packages to compile. If, in writing your module, you add any other R packages to the system you will want to make sure those are listed in the DESCRIPTION and in the script scripts/install.dependencies.R. Next, from the root pecan directory open all/DESCRIPTION and add your model package to the Suggests: list. At any point, if you want to check if PEcAn can build your MODEL package successfully, just go to the linux command prompt and run scripts/build.sh. You will need to do this before the system can use these packages. 20.2.0.4 write.config.MODEL (required) This module performs two primary tasks. The first is to take the list of parameter values and model input files that it receives as inputs and write those out in whatever format(s) the MODEL reads (e.g. a settings file). The second is to write out a shell script, jobs.sh, which, when run, will start your model run and convert its output to the PEcAn standard (netCDF with metadata currently equivalent to the MsTMIP standard). Within the MODEL directory take a close look at inst/template.job and the example write.config.MODEL to see an example of how this is done. It is important that this script writes or moves outputs to the correct location so that PEcAn can find them. The example function also shows an example of writing a model-specific settings/config file, also by using a template. You are encouraged to read the section above on defining PFTs before writing write.config.MODEL so that you understand what model parameters PEcAn will be passing you, how they will be named, and what units they will be in. Also note that the (optional) PEcAn input/driver processing scripts are called by separate workflows, so the paths to any required inputs (e.g. meteorology) will already be in the model-specific format by the time write.config.MODEL receives that info. 20.2.0.5 Output Conversions The module model2netcdf.MODEL converts model output into the PEcAn standard (netCDF with metadata currently equivalent to the MsTMIP standard). This function was previously required, but now that the conversion is called within jobs.sh it may be easier for you to convert outputs using other approaches (or to just directly write outputs in the standard). Whether you implement this function or convert outputs some other way, please note that PEcAn expects all outputs to be broken up into ANNUAL files with the year number as the file name (i.e. YEAR.nc), though these files may contain any number of scalars, vectors, matrices, or arrays of model outputs, such as time-series of each output variable at the model’s native timestep. Note: PEcAn reads all variable names from the files themselves so it is possible to add additional variables that are not part of the MsTMIP standard. Similarly, there are no REQUIRED output variables, though time is highly encouraged. We are shortly going establish a canonical list of PEcAn variables so that if users add additional output variables they become part of the standard. We don’t want two different models to call the same output with two different names or different units as this would prohibit the multi-model syntheses and comparisons that PEcAn is designed to facilitate. 20.2.0.6 met2model.MODEL met2model.MODEL(in.path, in.prefix, outfolder, start_date, end_date) Converts meteorology input files from the PEcAn standard (netCDF, CF metadata) to the format required by the model. This file is optional if you want to load all of your met files into the Inputs table as described in How to insert new Input data, which is often the easiest way to get up and running quickly. However, this function is required if you want to benefit from PEcAn’s meteorology workflows and model run cloning. You’ll want to take a close look at [Adding-an-Input-Converter] to see the exact variable names and units that PEcAn will be providing. Also note that PEcAn splits all meteorology up into ANNUAL files, with the year number explicitly included in the file name, and thus what PEcAn will actually be providing is in.path, the input path to the folder where multiple met files may stored, and in.prefix, the start of the filename that precedes the year (i.e. an individual file will be named <in.prefix>.YEAR.nc). It is valid for in.prefix to be blank. The additional REQUIRED arguments to met2model.MODEL are outfolder, the output folder where PEcAn wants you to write your meteorology, and start_date and end_date, the time range the user has asked the meteorology to be processed for. 20.2.0.7 Commit changes Once the MODEL modules are written, you should follow the Using-Git instructions on how to commit your changes to your local git repository, verify that PEcAn compiles using scripts/build.sh, push these changes to Github, and submit a pull request so that your model module is added to the PEcAn system. It is important to note that while we encourage users to make their models open, adding the PEcAn interface module to the Github repository in no way requires that the model code itself be made public. It does, however, allow anyone who already has a copy of the model code to use PEcAn so we strongly encourage that any new model modules be committed to Github. "],["NewInput.html", "20.3 Adding input data", " 20.3 Adding input data 20.3.1 Input records in BETY All model input data or data used for model calibration/validation must be registered in the BETY database. Before creating a new Input record, you must make sure that the format type of your data is registered in the database. If you need to make a new format record, see Creating a new format record in BETY. 20.3.2 Create a database file record for the input data An input record contains all the metadata required to identify the data, however, this record does not include the location of the data file. Since the same data may be stored in multiple places, every file has its own dbfile record. From your BETY interface: Create a DBFILES entry for the path to the file From the menu click RUNS then FILES Click “New File” Select the machine your file is located at Fill in the File Path where your file is located (aka folder or directory) NOT including the name of the file itself Fill in the File Name with the name of the file itself. Note that some types of input records will refer to be ALL the files in a directory and thus File Name can be blank Click Update 20.3.3 Creating a new Input record in BETY From your BETY interface: Create an INPUT entry for your data From the menu click RUNS then INPUTS Click “New Input” Select the SITE that this data is associated with the input data set Other required fields are a unique name for the input, the start and end dates of the data set, and the format of the data. If the data is not in a currently known format you will need to create a NEW FORMAT and possibly a new input converter. Instructions on how to do add a converter can be found here Input conversion. Instructions on how to add a format record can be found here Parent ID is an optional variable to indicated that one dataset was derived from another. Click “Create” Associate the DBFILE with the INPUT In the RUNS -> INPUTS table, search and find the input record you just created Click on the EDIT icon Select “View related Files” In the Search window, search for the DBFILE you just created Once you have found the DBFILE, click on the “+” icon to add the file Click on “Update” at the bottom when you are done. 20.3.4 Adding a new input converter Three Types of data conversions are discussed below: Meteorological data, Vegetation data, and Soil data. Each section provides instructions on how to convert data from their raw formats into a PEcAn standard format, whether it be from a database or if you have raw data in hand. Also, see PEcAn standard formats. 20.3.4.1 Meterological Data 20.3.4.1.1 Adding a function to PEcAn to convert a met data source In general, you will need to write a function to download the raw met data and one to convert it to the PEcAn standard. Downloading raw data function are named download.<source>.R. These functions are stored within the PEcAn directory: /modules/data.atmosphere/R. Conversion function from raw to standard are named met2CF.<source>.R. These functions are stored within the PEcAn directory: /modules/data.atmosphere/R. Current Meteorological products that are coupled to PEcAn can be found in our Available Meteorological Drivers page. Note: Unless you are also adding a new model, you will not need to write a script to convert from PEcAn standard to PEcAn models. Those conversion scripts are written when a model is added and can be found within each model’s PEcAn directory. Standards dimesion, names, nad units can be found here: Input Standards 20.3.4.1.2 Adding Single-Site Specific Meteorological Data Perhaps you have meteorological data specific to one site, with a unique format that you would like to add to PEcAn. Your steps would be to: 1. write a script or function to convert your files into the netcdf PEcAn standard 2. insert that file as an input record for your site following these instructions 20.3.4.1.3 Processing Met data outside of the workflow using PEcAn functions Perhaps you would like to obtain data from one of the sources coupled to PEcAn on its own. To do so you can run PEcAn functions on their own. 20.3.4.1.3.1 Example 1: Processing data from a database Download Amerifluxlbl from Niwot Ridge for the year 2004: raw.file <-PEcAn.data.atmosphere::download.AmerifluxLBL(sitename = "US-NR1", outfolder = ".", start_date = "2004-01-01", end_date = "2004-12-31") Using the information returned as the object raw.file you will then convert the raw files into a standard file. Open a connection with BETY. You may need to change the host name depending on what machine you are hosting BETY. You can find the hostname listed in the machines table of BETY. con <- PEcAn.DB::db.open( params = list( driver = RPostgres::Postgres(), dbname = 'bety', host ='localhost', user = "bety", password = "bety") ) Next you will set up the arguments for the function in.path <- '.' in.prefix <- raw.file$dbfile.name outfolder <- '.' format.id <- 5000000002 format <- PEcAn.DB::query.format.vars(format.id=format.id,bety = con) lon <- -105.54 lat <- 40.03 format$time_zone <- "America/Chicago" Note: The format.id can be pulled from the BETY database if you know the format of the raw data. Once these arguments are defined you can execute the met2CF.csv function PEcAn.data.atmosphere::met2CF.csv(in.path = in.path, in.prefix =in.prefix, outfolder = ".", start_date ="2004-01-01", end_date = "2004-12-01", lat= lat, lon = lon, format = format) 20.3.4.1.3.2 Example 2: Processing data from data already in hand If you have Met data already in hand and you would like to convert into the PEcAn standard follow these instructions. Update BETY with file record, format record and input record according to this page How to Insert new Input Data If your data is in a csv format you can use the met2CF.csvfunction to convert your data into a PEcAn standard file. Open a connection with BETY. You may need to change the host name depending on what machine you are hosting BETY. You can find the hostname listed in the machines table of BETY. con <- PEcAn.DB::db.open( params = list( driver = RPostgres::Postgres(), dbname = 'bety', host ='localhost', user = "bety", password = "bety") ) Prepare the arguments you need to execute the met2CF.csv function in.path <- 'path/where/the/raw/file/lives' in.prefix <- 'prefix_of_the_raw_file' outfolder <- 'path/to/where/you/want/to/output/thecsv/' format.id <- formatid of the format your created format <- PEcAn.DB::query.format.vars(format.id=format.id, bety = con) lon <- longitude of your site lat <- latitude of your site format$time_zone <- time zone of your site start_date <- Start date of your data in "y-m-d" end_date <- End date of your data in "y-m-d" Next you can execute the function: PEcAn.data.atmosphere::met2CF.csv(in.path = in.path, in.prefix =in.prefix, outfolder = ".", start_date = start_date, end_date = end_date, lat= lat, lon = lon, format = format) 20.3.4.2 Vegetation Data Vegetation data will be required to parameterize your model. In these examples we will go over how to produce a standard initial condition file. The main function to process cohort data is the ic_process.R function. As of now however, if you require pool data you will run a separate function, pool_ic_list2netcdf.R. 20.3.4.2.0.1 Example 1: Processing Veg data from data in hand. In the following example we will process vegetation data that you have in hand using PEcAn. First, you’ll need to create a input record in BETY that will have a file record and format record reflecting the location and format of your file. Instructions can be found in our How to Insert new Input Data page. Once you have created an input record you must take note of the input id of your record. An easy way to take note of this is in the URL of the BETY webpage that shows your input record. In this example we use an input record with the id 1000013064 which can be found at this url: https://psql-pecan.bu.edu/bety/inputs/1000013064# . Note that this is the Boston University BETY database. If you are on a different machine, your url will be different. With the input id in hand you can now edit a pecan XML so that the PEcAn function ic_process will know where to look in order to process your data. The inputs section of your pecan XML will look like this. As of now ic_process is set up to work with the ED2 model so we will use ED2 settings and then grab the intermediary Rds data file that is created as the standard PEcAn file. For your Inputs section you will need to input your input id wherever you see the useic flag. <inputs> <css> <source>FFT</source> <output>css</output> <username>pecan</username> <id>1000013064</id> <useic>TRUE</useic> <metadata> <trk>1</trk> <age>70</age> <area>400</area> </metadata> </css> <pss> <source>FFT</source> <output>pss</output> <username>pecan</username> <id>1000013064</id> <useic>TRUE</useic> </pss> <site> <source>FFT</source> <output>site</output> <username>pecan</username> <id>1000013064</id> <useic>TRUE</useic> </site> <met> <source>CRUNCEP</source> <output>ED2</output> </met> <lu> <id>294</id> </lu> <soil> <id>297</id> </soil> <thsum> <id>295</id> </thsum> <veg> <id>296</id> </veg> </inputs> This IC workflow also supports generating ensembles of initial conditions from posterior estimates of DBH. To do this the tags below can be inserted to the pecan.xml: <css> <source>PalEON</source> <output>css</output> <id>1000015682</id> <useic>TRUE</useic> <ensemble>20</ensemble> <metadata> <area>1256.637</area> <n.patch>3</n.patch> </metadata> </css> Here the id should point to a file that has MCMC samples to generate the ensemble from. The number between the <ensemble> tag defines the number of ensembles requested. The workflow will populate the settings list run$inputs tag with ensemble member information. E.g.: <inputs> <css> <path1>...</path1> <path2>...</path2> <path3>...</path3> ... <pathN>...</pathN> </css> <pss> <path> <path1>...</path1> <path2>...</path2> <path3>...</path3> ... <pathN>...</pathN> </path> </pss> <site> <path> <path1>...</path1> <path2>...</path2> <path3>...</path3> ... <pathN>...</pathN> </path> </site> <met>...</met> <lu>...</lu> <soil>...</soil> <thsum>...</thsum> <veg>...</veg> </inputs> Once you edit your PEcAn.xml you can than create a settings object using PEcAn functions. Your pecan.xml must be in your working directory. settings <- PEcAn.settings::read.settings("pecan.xml") settings <- PEcAn.settings::prepare.settings(settings, force=FALSE) You can then execute the ic_process function to convert data into a standard Rds file: input <- settings$run$inputs dir <- "." ic_process(settings, input, dir, overwrite = FALSE) Note that the argument dir is set to the current directory. You will find the final ED2 file there. More importantly though you will find the .Rds file within the same directory. 20.3.4.2.0.2 Example 3 Pool Initial Condition files If you have pool vegetation data, you’ll need the pool_ic_list2netcdf.R function to convert the pool data into PEcAn standard. The function stands alone and requires that you provide a named list of netcdf dimensions and values, and a named list of variables and values. Names and units need to match the standard_vars.csv table found here. #Create a list object with necessary dimensions for your site input<-list() dims<- list(lat=-115,lon=45, time= 1) variables<- list(SoilResp=8,TotLivBiom=295) input$dims <- dims input$vals <- variables Once this is done, set outdir to where you’d like the file to write out to and a siteid. Siteid in this can be used as an file name identifier. Once part of the automated workflow siteid will reflect the site id within the BET db. outdir <- "." siteid <- 772 pool_ic_list2netcdf(input = input, outdir = outdir, siteid = siteid) You should now have a netcdf file with initial conditions. 20.3.4.3 Soil Data 20.3.4.3.0.1 Example 1: Converting Data in hand Local data that has the correct names and units can easily be written out in PEcAn standard using the function soil2netcdf. soil.data <- list(volume_fraction_of_sand_in_soil = c(0.3,0.4,0.5), volume_fraction_of_clay_in_soil = c(0.3,0.3,0.3), soil_depth = c(0.2,0.5,1.0)) soil2netcdf(soil.data,"soil.nc") At the moment this file would need to be inserted into Inputs manually. By default, this function also calls soil_params, which will estimate a number of hydraulic and thermal parameters from texture. Be aware that at the moment not all model couplers are yet set up to read this file and/or convert it to model-specific formats. 20.3.4.3.0.2 Example 2: Converting PalEON data In addition to location-specific soil data, PEcAn can extract soil texture information from the PalEON regional soil product, which itself is a subset of the MsTMIP Unified North American Soil Map. If this product is installed on your machine, the appropriate step in the do_conversions workflow is enabled by adding the following tag under <inputs> in your pecan.xml <soil> <id>1000012896</id> </soil> In the future we aim to extend this extraction to a wider range of soil products. 20.3.4.3.0.3 Example 3: Extracting soil properties from gSSURGO database In addition to location-specific soil data, PEcAn can extract soil texture information from the gSSURGO data product. This product needs no installation and it extract soil proeprties for the lower 48 states in U.S. In order to let the pecan know that you’re planning to use gSSURGO, you can the following XML tag under input in your pecan xml file. <inputs> <soil> <source>gSSURGO</source> </soil> </inputs> "],["adding-data-web.html", "20.4 Pecan Data Ingest via Web Interface", " 20.4 Pecan Data Ingest via Web Interface This tutorial explains the process of ingesting data into PEcAn via our Data-Ingest Application. In order to ingest data, the users must first select data that they wish to upload. Then, they enter metadata to help PEcAn parse and load the data into the main PEcAn workflow. 20.4.1 Loading Data 20.4.1.1 Selecting Ingest Method The Data-Ingest application is capable of loading data from the DataONE data federation and from the user’s local machine. The first step in the workflow is therefore to select an upload method. The application defaults to uploading from DataONE. To upload data from a local device, simply select the radio button titled Local Files. 20.4.1.2 DataONE Upload Example The DataONE download feature allows the user to download data at a given doi or DataONE specific package id. To do so, enter the doi or identifier in the Import From DataONE field and select download. The download process may take a couple of minutes to run depending on the number of files in the dataONE package. This may be a convenient option if the user does not wish to download files directly to their local machine. Once the files have been successfully downloaded from DataONE, they are displayed in a table. Before proceeding to the next step, the user can select a file to ingest by clicking on the corresponding row in the data table. 20.4.2 Local Upload Example To upload local files, the user should first select the Local Files button. From there, the user can upload files from their local machines by selecting Browse or by dragging and dropping files into the text box. The files will begin uploading automatically. From there, the user should select a file to ingest and then select the Next Step button. After this step, the workflow is identical for both methods. However, please note that if it becomes necessary to switch from loading data via DataONE to uploading local files after the first step, please restart the application. ### 2. Creating an Input Record Creating an input record requires some basic metadata about the file that is being ingested. Each entry field is briefly explained below. Site: To link the selected file with a site, the user can scroll or type to search all the sites in PEcAn. See Example: Parent: To link the selected file with another dataset, type to search existing datasets in the Parent field. Name: this field should be autofilled by selecting a file in step 1. Format: If the selected file has an existing format name, the user can search and select in the Format field. If the selected file’s format is not already in pecan, the user can create a new format by selecting Create New Format. Once this new format is created, it will automatically populate the Format box and the Current Mimetype box (See Section 3). Mimetype: If the format already exists, select an existing mimetype. Start and End Date and Time: Inputs can be entered manually or by using the user interface. See example Notes: Describe the data that is being uploaded. Please include any citations or references. 20.4.3 3. Creating a format record If it is necessary to add a new format to PEcAn, the user should fill out the form attached to the Create New Format button. The inputs to this form are described below: Mimetype: type to search existing mimetypes. If the mimetype is not in that list, please click on the link Create New Mimetype and create a new mimetype via the BETY website. New Format Name: Add the name of the new format. Please exclude spaces from the name. Instead please use underscores “_“. Header: If there is space before the first line of data in the dataset, please select Yes Skip: The number of lines in the header that should be skipped before the data. Please enter notes that describe the format. Example: ### 4. Formats_Variables Record The final step in the ingest process is to register a formats-variables record. This record links pecan variables with variables from the selected data. Variable: PEcAn variable that is equivalent to variable in selected file. Name: The variable name in the imported data need only be specified if it differs from the BETY variable name. Unit: Should be in a format parseable by the udunits library and need only be secified if the units of the data in the file differ from the BETY standard. Storage Type: Storage type need only be specified if the variable is stored in a format other than would be expected (e.g. if numeric values are stored as quoted character strings). Additionally, storage_type stores POSIX codes that are used to store any time variables (e.g. a column with a 4-digit year would be %Y). Column Number: Vector of integers that list the column numbers associated with variables in a dataset. Required for text files that lack headers. Finally, the path to the ingest data is displayed in the Select Files box. "],["NewFormat.html", "20.5 Creating a new format", " 20.5 Creating a new format 20.5.1 Formats in BETY The PEcAn database keeps track of all the input files passed to models, as well as any data used in model validation or data assimilation. Before we start to register these files with PEcAn we need to define the format these files will be in. The main goal is to take all the meta-data we have about a data file and create a record of it that pecan can use as a guide when parsing the data file. This information is stored in a Format record in the bety database. Make sure to read through the current Formats before deciding to make a new one. 20.5.2 Creating a new format in BETY If the Format you are looking for is not available, you will need to create a new record. Before entering information into the database, you need to be able to answer the following questions about your data: What is the file MIME type? We have a suit of functions for loading in data in open formats such as CSV, txt, netCDF, etc. PEcAn has partnered with the NCSA BrownDog project to create a service that can read and convert as many data formats as possible. If your file type is less common or a proprietary type, you can use the BrownDog DAP to convert it to a format that can be used with PEcAn. If BrownDog cannot convert your data, you will need to contact us about writing a data specific load function. What variables does the file contain? What are the variables named? What are the variable units? How do the variable names and units in the data map to PEcAn variables in the BETY database? See below for an example. It is most likely that you will NOT need to add variables to BETY. However, identifying the appropriate variables matches in the database may require some work. We are always available to help answer your questions. Is there a timestamp on the data? What are the units of time? Here is an example using a fake dataset: example_data This data started out as an excel document, but was saved as a CSV file. To create a Formats record for this data, in the web interface of BETY, select Runs > Formats and click New Format. You will need to fill out the following fields: MIME type: File type (you can search for other formats in the text field) Name: The name of your format (this can be whatever you want) Header: Boolean that denotes whether or not your data contains a header as the first line of the data. (1 = TRUE, 0 = FALSE) Skip: The number of lines above the data that should be skipped. For example, metadata that should not be included when reading in the data or blank spaces. Notes: Any additional information about the data such as sources and citations. Here is the Formats record for the example data: format_record_1 When you have finished this section, hit Create. The final record will be displayed on the screen. 20.5.2.1 Formats -> Variables After a Format entry has been created, you are encouraged to edit the entry to add relationships between the file’s variables and the Variables table in PEcAn. Not only do these relationships provide meta-data describing the file format, but they also allow PEcAn to search and (for some MIME types) read files. To enter this data, select Edit Record and on the edit screen select View Related Variable. Here is the record for the example data after adding related variables: format_record_2 20.5.2.1.1 Name and Unit For each variable in the file you will want at a minimum to specify the NAME of the variable within your file and match that to the equivalent Variable in the pulldown. Make sure to search for your variables under Data > Variables before suggesting that we create a new variable record. This may not always be a straightforward process. For example bety contains a record for Net Primary Productivity: var_record This record does not have the same variable name or the same units as NPP in the example data. You may have to do some reading to confirm that they are the same variable. In this case - Both the data and the record are for Net Primary Productivity (the notes section provides additional resources for interpreting the variable.) - The units of the data can be converted to those of the vairiable record (this can be checked by running udunits2::ud.are.convertible(\"g C m-2 yr-1\", \"Mg C ha-1 yr-1\")) Differences between the data and the variable record can be accounted for in the data Formats record. Under Variable, select the variable as it is recorded in bety. Under Name, write the name the variable has in your data file. Under Unit, write the units the variable has in your data file. NOTE: All units must be written in a udunits compliant format. To check that your units can be read by udunits, in R, load the udunits2 package and run udunits2::is.parseable(\"g C m-2 yr-1\") If the name or the units are the same, you can leave the Name and Unit fields blank. This is can be seen with the variable LAI. 20.5.2.1.2 Storage Type Storage Type only needs to be specified if the variable is stored in a format other than what would be expected (e.g. if numeric values are stored as quoted character strings). One such example is time variables. PEcAn converts all dates into POSIX format using R functions such as strptime. These functions require that the user specify the format in which the date is written. The default is \"%Y-%m-%d %H:%M:%S\" which would look like \"2017-01-01 00:00:00\" A list of date formats can be found in the R documentation for the function strptime Below are some commonly used codes: %d Day of the month as decimal number (01–31). %D Date format such as %m/%d/%y. %H Hours as decimal number (00–23). %m Month as decimal number (01–12). %M Minute as decimal number (00–59). %S Second as integer (00–61), allowing for up to two leap-seconds (but POSIX-compliant implementations will ignore leap seconds). %T Equivalent to %H:%M:%S. %y Year without century (00–99). On input, values 00 to 68 are prefixed by 20 and 69 to 99 by 19 – that is the behaviour specified by the 2004 and 2008 POSIX standards, but they do also say ‘it is expected that in a future version the default century inferred from a 2-digit year will change’. %Y Year with century. 20.5.2.1.3 Column Number If your data is in text format with variables in a standard order then you can specify the Column Number for the variable. This is required for text files that lack headers. 20.5.2.2 Retrieving Format Information To acquire Format information from a Format record, use the R function query.format.vars 20.5.2.2.1 Inputs bety: connection to BETY input.id=NA and/or format.id=NA: Input or Format record ID from BETY At least one must be specified. Defaults to format.id if both provided. var.ids=NA: optional vector of variable IDs. If provided, limits results to these variables. 20.5.2.2.2 Output R list object containing many things. Fill this in. "],["NewBenchmark.html", "20.6 Creating a new benchmark reference run", " 20.6 Creating a new benchmark reference run The purpose of the reference run record in BETY is to store all the settings from a run that are necessary in exactly recreating it. The pecan.xml file is the home of absolutely all the settings for a particular run in pecan. However, much of the information in the pecan.xml file is server and user specific and more importantly, the pecan.xml files are stored on individual servers and may not be available to the public. When a run that is performed using pecan is registered as a reference run, the settings that were used to make that run are made available to all users through the database. All completed runs are not automatically registered as reference runs. To register a run, navigate to the benchmarking section of the workflow visualizations Shiny app. "],["editing-records.html", "20.7 Editing records", " 20.7 Editing records Models Species PFTs Traits Inputs DB files Variables Formats (Link each section to relevant Bety tables) "],["troubleshooting-and-debugging-pecan.html", "21 Troubleshooting and Debugging PEcAn ", " 21 Troubleshooting and Debugging PEcAn "],["cookies-and-pecan-web-pages.html", "21.1 Cookies and pecan web pages", " 21.1 Cookies and pecan web pages You may need to disable cookies specifically for the pecan webserver in your browser. This shouldn’t be a problem running from the virtual machine, but your installation of php can include a ‘PHPSESSID’ that is quite long, and this can overflow the params field of the workflows table, depending on how long your hostname, model name, site name, etc are. "],["warning-mkdir-function.mkdir-no-such-file-or-directory.html", "21.2 Warning: mkdir() [function.mkdir]: No such file or directory", " 21.2 Warning: mkdir() [function.mkdir]: No such file or directory If you are seeing: Warning: mkdir() [function.mkdir]: No such file or directory in /path/to/pecan/web/runpecan.php at line 169 it is because you have used a relative path for $output_folder in system.php. "],["after-creating-a-new-pft-the-tag-for-pft-not-passed-to-config.xml-in-ed.html", "21.3 After creating a new PFT the tag for PFT not passed to config.xml in ED", " 21.3 After creating a new PFT the tag for PFT not passed to config.xml in ED This is a result of the rather clunky way we currently have adding PFTs to PEcAn. This is happening because you need to edit the ./pecan/models/ed/data/pftmapping.csv file to include your new PFTs. This is what the file looks like: PEcAn;ED ebifarm.acru;11 ebifarm.acsa3;11 ... You just need to edit this file (in a text editor, no Excel) and add your PFT names and associated number to the end of the file. Once you do this, recompile PEcAn and it should then work for you. We currently need to reference this file in order to properly set the PFT number and maintain internal consistency between PEcAn and ED2. "],["debugging.html", "21.4 Debugging", " 21.4 Debugging How to identify the source of a problem. 21.4.1 Using tests/workflow.R This script, along with model-specific settings files in the tests folder, provide a working example. From inside the tests folder, R CMD --vanilla -- --settings pecan.<model>.xml < workflow.R should work. The next step is to add debugonce(<broken.function.name>) before running the test workflow. This allows you can step through the function and evaluate the different objects as they are created and/or transformed. See tests README for more information. "],["useful-scripts.html", "21.5 Useful scripts", " 21.5 Useful scripts The following scripts (in qaqc/vignettes identify, respectively: relationships among functions across packages function inputs and outputs (e.g. that will identify which functions and outputs are used in a workflow). "],["database.html", "22 BETY Database Administration", " 22 BETY Database Administration This section provides additional details about the BETY database used by PEcAn. It will discuss best practices for setting up the BETY database, how to backup the database and how to restore the database. "],["database-setup.html", "22.1 Best practices", " 22.1 Best practices When using the BETY database in non testing mode, it is best not to use the default users. This is accomplished when running the initialize of the database. When the database is initally created the database will be created with some default users (best known is the carya user) as well as the guestuser that can be used in the BETY web application. To disable these users you will either need to disable the users from the web interface, or you can reinitialize the database and remove the -u flag from the command line (the -u flag will create the default users). To disable the guestuser as well you can remove the -g flag from the command line, or disable the account from BETY. The default installation of BETY and PEcAn will assume there is a database called bety with a default username and password. The default installation will setup the database account to not have any superuser abilities. It is also best to limit access to the postgres database from trusted hosts, either by using firewalls, or configuring postgresql to only accept connections from a limited set of hosts. "],["backup-of-bety-database.html", "22.2 Backup of BETY database", " 22.2 Backup of BETY database It is good practice to make sure you backup the BETY database. Just creating a copy of the files on disk is not enough to ensure you have a valid backup. Most likely if you do this you will end up with a corrupted backup of the database. To backup the database you can use the pg_dump command, which will make sure the database id backed up in a consistent state. You can run sudo -u postgres pg_dump -d bety -Z 9 -f bety.sql.gz, this will create a compressed file that can be used to resotore the database. In the PEcAn distribution in scripts folder there is a script called backup.bety.sh. This script will create the backup of the database. It will create multiple backups allowing you to restore the database from one of these copies. The database will be backed up to one of the following files: - bety-d-X, daily backup, where X is the day of the month. - bety-w-X, weekly backup, where X is the week number in the year - bety-m-X, montly backup, where X is the month of the year - bety-y-X, yearly backup, where X is the actual year. Using this scheme, we can restore the database using any of the files generated. It is recommeneded to run this script using a cronjob at midnight such that you have a daily backup of the database and do not have to remember to create these backups. When running this script (either cron or by hand) make sure to place the backups on a different machine than the machine that holds the database in case of a larger system failure. "],["restore-of-bety-database.html", "22.3 Restore of BETY database", " 22.3 Restore of BETY database Hopefully this section will never need to be used. Following are 5 steps that have been used to restore the database. Before you start it is worth it to read up online a bit on restoring the database as well as join the slack channel and ask any of the people there for help. stop apache (BETY/PEcAn web apps) service httpd stop or service apache2 stop backup database (you know just incase) pg_dump -d bety > baddb.sql drop database sudo -u postgres psql -c 'drop database bety' create database sudo -u postgres psql -c 'create database bety with owner bety' load database (assuming dump is called bety.sql.gz) zcat bety.sql.gz | grep -v search_path | sudo -u postgres psql -d bety start apache again service httpd start or service apache2 start If during step 5 there is a lot of errors, it is helpful to add -v ON_ERROR_STOP=1 to the end of the command. This will stop the restore at the first error and will help with debugging the issue. "],["workflow-modules.html", "23 Workflow modules", " 23 Workflow modules NOTE: As of PEcAn 1.2.6 – needs to be updated significantly "],["overview-1.html", "23.1 Overview", " 23.1 Overview Workflow inputs and outputs (click to open in new page, then zoom). Code used to generate this image is provided in qaqc/vignettes/module_output.Rmd "],["load-settings.html", "23.2 Load Settings", " 23.2 Load Settings 23.2.1 read.settings(\"/home/pecan/pecan.xml\") loads settings create directories generates new xml, put in output folder "],["query-database.html", "23.3 Query Database", " 23.3 Query Database 23.3.1 get.trait.data() Queries the database for both the trait data and prior distributions associated with the PFTs specified in the settings file. The list of variables that are queried is determined by what variables have priors associated with them in the definition of the pft. Likewise, the list of species that are associated with a PFT determines what subset of data is extracted out of all data matching a given variable name. "],["meta-analysis.html", "23.4 Meta Analysis", " 23.4 Meta Analysis 23.4.1 run.meta.analysis() The meta-analysis code begins by distilling the trait.data to just the values needed for the meta-analysis statistical model, with this being stored in madata.Rdata. This reduced form includes the conversion of all error statistics into precision (1/variance), and the indexing of sites, treatments, and greenhouse. In reality, the core meta-analysis code can be run independent of the trait database as long as input data is correctly formatted into the form shown in madata. The evaluation of the meta-analysis is done using a Bayesian statistical software package called JAGS that is called by the R code. For each trait, the R code will generate a [trait].model.bug file that is the JAGS code for the meta-analysis itself. This code is generated on the fly, with PEcAn adding or subtracting the site, treatment, and greenhouse terms depending upon the presence of these effects in the data itself. Meta-analyses are run, and summary plots are produced. "],["write-configuration-files.html", "23.5 Write Configuration Files", " 23.5 Write Configuration Files 23.5.1 write.configs(model) writes out a configuration file for each model run ** writes 500 configuration files for a 500 member ensemble ** for n traits, writes 6 * n + 1 files for running default Sensitivity Analysis (number can be changed in the pecan settings file) "],["start-runs.html", "23.6 Start Runs", " 23.6 Start Runs 23.6.1 start.runs(model) This code starts the model runs using a model specific run function named start.runs.model. If the ecosystem model is running on a remote server, this module also takes care of all of the communication with the remote server and its run queue. Each of your subdirectories should now have a [run.id].out file in it. One instance of the model is run for each configuration file generated by the previous write configs module. "],["get-model-output.html", "23.7 Get Model Output", " 23.7 Get Model Output 23.7.1 get.model.output(model) This code first uses a model-specific model2netcdf.model function to convert the model output into a standard output format (MsTMIP). Then it extracts the data for requested variables specified in the settings file as settings$ensemble$variable, averages over the time-period specified as start.date and end.date, and stores the output in a file output.Rdata. The output.Rdata file contains two objects, sensitivity.output and ensemble.output, that is the model prediction for the parameter sets specified in sa.samples and ensemble.samples. In order to save bandwidth, if the model output is stored on a remote system PEcAn will perform these operations on the remote host and only return the output.Rdata object. "],["ensemble-analysis.html", "23.8 Ensemble Analysis", " 23.8 Ensemble Analysis 23.8.1 run.ensemble.analysis() This module makes some simple graphs of the ensemble output. Open ensemble.analysis.pdf to view the ensemble prediction as both a histogram and a boxplot. ensemble.ts.pdf provides a timeseries plot of the ensemble mean, meadian, and 95% CI "],["sensitivity-analysis-variance-decomposition.html", "23.9 Sensitivity Analysis, Variance Decomposition", " 23.9 Sensitivity Analysis, Variance Decomposition 23.9.1 run.sensitivity.analysis() This function processes the output of the previous module into sensitivity analysis plots, sensitivityanalysis.pdf, and a variance decomposition plot, variancedecomposition.pdf . In the sensitivity plots you will see the parameter values on the x-axis, the model output on the Y, with the dots being the model evaluations and the line being the spline fit. The variance decomposition plot is discussed more below. For your reference, the R list object, sensitivity.results, stored in sensitivity.results.Rdata, contains all the components of the variance decomposition table, as well as the the input parameter space and splines from the sensitivity analysis (reminder: the output parameter space from the sensitivity analysis was in outputs.R). The variance decomposition plot contains three columns, the coefficient of variation (normalized posterior variance), the elasticity (normalized sensitivity), and the partial standard deviation of each model parameter. This graph is sorted by the variable explaining the largest amount of variability in the model output (right hand column). From this graph identify the top-tier parameters that you would target for future constraint. "],["glossary.html", "23.10 Glossary", " 23.10 Glossary Inputs: data sets that are used, and file paths leading to them Parameters: e.g. info set in settings file Outputs: data sets that are dropped, and the file paths leading to them "],["installation-details.html", "24 Installation details", " 24 Installation details This chapter contains details about installing and maintaining the uncontainerized version of PEcAn on a virtual machine or a server. If you are running PEcAn inside of Docker, many of the particulars will be different and you should refer to the docker chapter instead of this one. "],["pecanvm.html", "24.1 PEcAn Virtual Machine", " 24.1 PEcAn Virtual Machine See also other VM related documentation sections: Maintaining your PEcAn VM Connecting to the VM via SSH Connecting to bety on the VM via SSh Using Amazon Web Services for a VM (AWS) Creating a Virtual Machine VM Desktop Conversion Install RStudio Desktop The PEcAn virtual machine consists of all of PEcAn pre-compiled within a Linux operating system and saved in a “virtual machine” (VM). Virtual machines allow for running consistent set-ups without worrying about differences between operating systems, library dependencies, compiling the code, etc. Install VirtualBox This is the software that runs the virtual machine. You can find the download link and instructions at http://www.virtualbox.org. NOTE: On Windows you may see a warning about Logo testing, it is okay to ignore the warning. Download the PEcAn VM You can find the download link at http://opensource.ncsa.illinois.edu/projects/artifacts.php?key=PECAN, under the “Files” header. Click the “.ova” file to begin the download. Note that the file is ~7 GB, so this download can take several minutes to hours depending on your connection speed. Also, the VM requires >4 GB of RAM to operate correctly. Please check current usage of RAM and shutdown processes as needed. Import the VM Once the download is complete, open VirtualBox. In the VirtualBox menus, go to “File” → “Import Appliance” and locate the downloaded “.ova” file. For Virtualbox version 5.x: In the Appliance Import Settings, make sure you select “Reinitialize the MAC address of all network cards” (picture below). This is not selected by default and can result in networking issues since multiple machines might claim to have the same network MAC Address. For Virtualbox versions starting with 6.0, there is a slightly different interface (see figure). Select “Generate new MAC addresses for all network adapters” from the MAC Address Policy: NOTE: If you experience network connection difficulties in the VM with this enabled, try re-importing the VM without this setting selected). Finally, click “Import” to build the Virtual Machine from its image. Launch PEcAn Double click the icon for the PEcAn VM. A terminal window will pop up showing the machine booting up which may take a minute. It is done booting when you get to the pecan login: prompt. You do not need to login as the VM behaves like a server that we will be accessing through you web browser. Feel free to minimize the VM window. If you do want to login to the VM, the credentials are as follows: username: carya, password: illinois (after the pecan tree, [Carya illinoinensis][pecan-wikipedia]). Open the PEcAn web interface With the VM running in the background, open any web browser on the same machine and navigate to localhost:6480/pecan/ to start the PEcAn workflow. (NOTE: The trailing backslash may be necessary depending on your browser) To ssh into the VM, open up a terminal on your machine and execute ssh -l carya -p 6422 localhost. Username and password are the same as when you log into the machine. "],["aws-setup.html", "24.2 AWS Setup", " 24.2 AWS Setup ***********Mirror of earlier section in installation section?********************* The following are Mike’s rough notes from a first attempt to port the PEcAn VM to the AWS. This was done on a Mac These notes are based on following the instructions here 24.2.1 Convert PEcAn VM AWS allows upload of files as VMDK but the default PEcAn VM is in OVA format If you haven’t done so already, download the PEcAn VM Split the OVA file into OVF and VMDK files tar xf <ovafile> 24.2.2 Set up an account on AWS After you have an account you need to set up a user and save your access key and secret key In my case I created a user named ‘carya’ Note: the key that ended up working had to be made at https://console.aws.amazon.com/iam/home#security_credential, not the link above. 24.2.3 Install EC2 command line tools wget http://s3.amazonaws.com/ec2-downloads/ec2-api-tools.zip sudo mkdir /usr/local/ec2 sudo unzip ec2-api-tools.zip -d /usr/local/ec2 If need be, download and install JDK export JAVA_HOME=$(/usr/libexec/java_home) export EC2_HOME=/usr/local/ec2/ec2-api-tools-<version> export PATH=$PATH:$EC2_HOME/bin Then set your user credentials as environment variables: export AWS_ACCESS_KEY=xxxxxxxxxxxxxx export AWS_SECRET_KEY=xxxxxxxxxxxxxxxxxxxxxx Note: you may want to add all the variables set in the above EXPORT commands above into your .bashrc or equivalent. 24.2.4 Create an AWS S3 ‘bucket’ to upload VM to Go to https://console.aws.amazon.com/s3 and click “Create Bucket” In my case I named the bucket ‘pecan’ 24.2.5 Upload In the code below, make sure to change the PEcAn version, the name of the bucket, and the name of the region. Make sure that the PEcAn version matches the one you downloaded. Also, you may want to choose a considerably larger instance type. The one chosen below is that corresponding to the AWS Free Tier ec2-import-instance PEcAn_1.2.6-disk1.vmdk --instance-type t2.micro --format VMDK --architecture x86_64 --platform Linux --bucket pecan --region us-east-1 --owner-akid $AWS_ACCESS_KEY --owner-sak $AWS_SECRET_KEY Make sure to note the ID of the image since you’ll need it to check the VM status. Once the image is uploaded it will take a while (typically about an hour) for Amazon to convert the image to one it can run. You can check on this progress by running ec2-describe-conversion-tasks <image.ID> 24.2.6 Configuring the VM On the EC2 management webpage, https://console.aws.amazon.com/ec2, if you select Instances on the left hand side (LHS) you should be able to see your new PEcAn image as an option under Launch Instance. Before launching, you will want to update the firewall to open up additional ports that PEcAn needs – specifically port 80 for the webpage. Port 22 (ssh/sftp) should be open by default. Under “Security Groups” select “Inbound” then “Edit” and then add “HTTP”. Select “Elastic IPs” on the LHS, and “Allocate New Address” in order to create a public IP for your VM. Next, select “Network Interfaces” on the LHS and then under Actions select “Associate Addresses” then choose the Elastic IP you just created. See also http://docs.aws.amazon.com/AmazonVPC/latest/GettingStartedGuide/GetStarted.html 24.2.7 Set up multiple instances (optional) For info on setting up multiple instances with load balancing see: http://docs.aws.amazon.com/ElasticLoadBalancing/latest/DeveloperGuide/gs-ec2VPC.html Select “Load Balancers” on the LHS, click on “Create Load Balancer”, follow Wizard keeping defaults. To be able to launch multiple VMs: Under “Instances” convert VM to an Image. When done, select Launch, enable multiple instances, and associate with the previous security group. Once running, go back to “Load Balancers” and add the instances to the load balancer. Each instance can be accessed individually by it’s own public IP, but external users should access the system more generally via the Load Balancers DNS. 24.2.8 Booting the VM Return to “Instances” using the menu on the LHS. To boot the VM select “Actions” then “Instance State” then “Start”. In the future, once you have the VM loaded and configured this last step is the only one you will need to repeat to turn your VM on and off. The menu provided should specify the Public IP where the VM has launched "],["shiny-setup.html", "24.3 Shiny Setup", " 24.3 Shiny Setup Installing and configuring Shiny for PEcAn authors - Alexey Shiklomanov - Rob Kooper NOTE: Instructions are only tested for CentOS 6.5 and Ubuntu 16.04 NOTE: Pretty much every step here requires root access. 24.3.1 Install the Shiny R package and Shiny server Follow the instructions on the Shiny download page for the operating system you are using. 24.3.2 Modify the shiny configuration file The Shiny configuration file is located in /etc/shiny-server/shiny-server.conf. Comment out the entire file and add the following, replacing <username> with your user name and <location> with the URL location you want for your app. This will allow you to run Shiny apps from your web browser at https://your.server.edu/shiny/your-location run as shiny; server { listen 3838; location /<location>/ { run as <username>; site_dir /path/to/your/shiny/app; log_dir /var/log/shiny-server; directory_index on; } } For example, my configuration on the old test-pecan looks like this. run as shiny; server { listen 3838; location /ashiklom/ { run as ashiklom; site_dir /home/ashiklom/fs-data/pecan/shiny/; log_dir /var/log/shiny-server; directory_index on; } } …and I can access my Shiny apps at, for instance, https://test-pecan.bu.edu/shiny/ashiklom/workflowPlots. You can add as many location <loc> { ... } fields as you would like. run as shiny; server { listen 3838; location /ashiklom/ { ... } location /bety/ { ... } } If you change the configuration, for example to add a new location, you will need to restart Shiny server. If you are setting up a new instance of Shiny, skip this step and continue with the guide, since there are a few more steps to get Shiny working. If there is an instance of Shiny already running, you can restart it with: ## On CentOS sudo service shiny-server stop sudo service shiny-server start ## On Ubuntu sudo systemctl stop shiny-server.service sudo systemctl start shiny-server.service 24.3.3 Set the Apache proxy Create a file with the following name, based on the version of the operating system you are using: Ubuntu 16.04 (pecan1, pecan2, test-pecan) – /etc/apache2/conf-available/shiny.conf CentOS 6.5 (psql-pecan) – /etc/httpd/conf.d/shiny.conf Into this file, add the following: ProxyPass /shiny/ http://localhost:3838/ ProxyPassReverse /shiny/ http://localhost:3838/ RedirectMatch permanent ^/shiny$ /shiny/ 24.3.3.1 Ubuntu only: Enable the new shiny configuration sudo a2enconf shiny This will create a symbolic link to the newly created shiny.conf file inside the /etc/apache2/conf-enabled directory. You can do ls -l /etc/apache2/conf-enabled to confirm that this worked. 24.3.4 Enable and start the shiny server, and restart apache 24.3.4.1 On CentOS sudo ln -s /opt/shiny-server/config/init.d/redhat/shiny-server /etc/init.d sudo service shiny-server stop sudo service shiny-server start sudo service httpd restart You can check that Shiny is running with service shiny-server status. 24.3.4.2 On Ubuntu Enable the Shiny server service. This will make sure Shiny runs automatically on startup. sudo systemctl enable shiny-server.service Restart Apache. sudo apachectl restart Start the Shiny server. sudo systemctl start shiny-server.service If there are problems, you can stop the shiny-server.service with… sudo systemctl stop shiny-server.service …and then use start again to restart it. 24.3.5 Troubleshooting Refer to the log files for shiny (/var/log/shiny-server.log) and httpd (on CentOS, /var/log/httpd/error-log; on Ubuntu, /var/log/apache2/error-log). 24.3.6 Further reading Shiny server configuration reference "],["thredds-setup.html", "24.4 Thredds Setup", " 24.4 Thredds Setup Installing and configuring Thredds for PEcAn authors - Rob Kooper NOTE: Instructions are only tested for Ubuntu 16.04 on the VM, if you have instructions for CENTOS/RedHat please update this documentation NOTE: Pretty much every step here requires root access. 24.4.1 Install the Tomcat 8 and Thredds webapp The Tomcat 8 server can be installed from the default Ubuntu repositories. The thredds webapp will be downloaded and installed from unidata. First step is to install Tomcat 8 and configure it. The flag -Dtds.content.root.path should point to the location of where the thredds folder is located. This needs to be writeable by the user for tomcat. -Djava.security.egd is a special flag to use a different random number generator for tomcat. The default would take to long to generate a random number. apt-get -y install tomcat8 openjdk-8-jdk echo JAVA_OPTS=\\"-Dtds.content.root.path=/home/carya \\${JAVA_OPTS}\\" >> /etc/default/tomcat8 echo JAVA_OPTS=\\"-Djava.security.egd=file:/dev/./urandom \\${JAVA_OPTS}\\" >> /etc/default/tomcat8 service tomcat8 restart Next is to install the webapp. mkdir /home/carya/thredds chmod 777 /home/carya/thredds wget -O /var/lib/tomcat8/webapps/thredds.war ftp://ftp.unidata.ucar.edu/pub/thredds/4.6/current/thredds.war Finally we configure Apache to prox the thredds server cat > /etc/apache2/conf-available/thredds.conf << EOF ProxyPass /thredds/ http://localhost:8080/thredds/ ProxyPassReverse /thredds/ http://localhost:8080/thredds/ RedirectMatch permanent ^/thredds$ /thredds/ EOF a2enmod proxy_http a2enconf thredds service apache2 reload 24.4.1.1 Customize the Thredds server To customize the thredds server for your installation edit the file in /home/carya/thredds/threddsConfig.xml. For example the following file is included in the VM. <?xml version="1.0" encoding="UTF-8"?> <threddsConfig> <!-- all options are commented out in standard install - meaning use default values --> <!-- see http://www.unidata.ucar.edu/software/thredds/current/tds/reference/ThreddsConfigXMLFile.html --> <serverInformation> <name>PEcAn</name> <logoUrl>/pecan/images/pecan_small.jpg</logoUrl> <logoAltText>PEcAn</logoAltText> <abstract>Scientific Data</abstract> <keywords>meteorology, atmosphere, climate, ocean, earth science</keywords> <contact> <name>Rob Kooper</name> <organization>NCSA</organization> <email>kooper@illinois.edu</email> <!--phone></phone--> </contact> <hostInstitution> <name>PEcAn</name> <webSite>http://www.pecanproject.org/</webSite> <logoUrl>/pecan/images/pecan_small.jpg</logoUrl> <logoAltText>PEcAn Project</logoAltText> </hostInstitution> </serverInformation> <!-- The <catalogRoot> element: For catalogs you don't want visible from the /thredds/catalog.xml chain of catalogs, you can use catalogRoot elements. Each catalog root config catalog is crawled and used in configuring the TDS. <catalogRoot>myExtraCatalog.xml</catalogRoot> <catalogRoot>myOtherExtraCatalog.xml</catalogRoot> --> <!-- * Setup for generated HTML pages. * * NOTE: URLs may be absolute or relative, relative URLs must be relative * to the webapp URL, i.e., http://server:port/thredds/. --> <htmlSetup> <!-- * CSS documents used in generated HTML pages. * The CSS document given in the "catalogCssUrl" element is used for all pages * that are HTML catalog views. The CSS document given in the "standardCssUrl" * element is used in all other generated HTML pages. * --> <standardCssUrl>tds.css</standardCssUrl> <catalogCssUrl>tdsCat.css</catalogCssUrl> <openDapCssUrl>tdsDap.css</openDapCssUrl> <!-- * The Google Analytics Tracking code you would like to use for the * webpages associated with THREDDS. This will not track WMS or DAP * requests for data, only browsing the catalog. --> <googleTrackingCode></googleTrackingCode> </htmlSetup> <!-- The <TdsUpdateConfig> element controls if and how the TDS checks for updates. The default is for the TDS to check for the current stable and development release versions, and to log that information in the TDS serverStartup.log file as INFO entries. <TdsUpdateConfig> <logVersionInfo>true</logVersionInfo> </TdsUpdateConfig> --> <!-- The <CORS> element controls Cross-Origin Resource Sharing (CORS). CORS is a way to allow a website (such as THREDDS) to open up access to resources to web pages and applications running on a different domain. One example would be allowing a web-application to use fonts from a separate host. For TDS, this can allow a javascript app running on a different site to access data on a THREDDS server. For more information see: https://en.wikipedia.org/wiki/Cross-origin_resource_sharing The elements below represent defaults. Only the <enabled> tag is required to enable CORS. The default allowed origin is '*', which allows sharing to any domain. <CORS> <enabled>false</enabled> <maxAge>1728000</maxAge> <allowedMethods>GET</allowedMethods> <allowedHeaders>Authorization</allowedHeaders> <allowedOrigin>*</allowedOrigin> </CORS> --> <!-- The <CatalogServices> element: - Services on local TDS served catalogs are always on. - Services on remote catalogs are set with the allowRemote element below. They are off by default (recommended). --> <CatalogServices> <allowRemote>false</allowRemote> </CatalogServices> <!-- Configuring the CDM (netcdf-java library) see http://www.unidata.ucar.edu/software/netcdf-java/reference/RuntimeLoading.html <nj22Config> <ioServiceProvider class="edu.univ.ny.stuff.FooFiles"/> <coordSysBuilder convention="foo" class="test.Foo"/> <coordTransBuilder name="atmos_ln_sigma_coordinates" type="vertical" class="my.stuff.atmosSigmaLog"/> <typedDatasetFactory datatype="Point" class="gov.noaa.obscure.file.Flabulate"/> </nj22Config> --> <!-- CDM uses the DiskCache directory to store temporary files, like uncompressed files. <DiskCache> <alwaysUse>false</alwaysUse> <scour>1 hour</scour> <maxSize>1 Gb</maxSize> </DiskCache> --> <!-- Caching open NetcdfFile objects. default is to allow 50 - 100 open files, cleanup every 11 minutes <NetcdfFileCache> <minFiles>50</minFiles> <maxFiles>100</maxFiles> <scour>11 min</scour> </NetcdfFileCache> --> <!-- The <HTTPFileCache> element: allow 10 - 20 open datasets, cleanup every 17 minutes used by HTTP Range requests. <HTTPFileCache> <minFiles>10</minFiles> <maxFiles>20</maxFiles> <scour>17 min</scour> </HTTPFileCache> --> <!-- Writing GRIB indexes. <GribIndexing> <setExtendIndex>false</setExtendIndex> <alwaysUseCache>false</alwaysUseCache> </GribIndexing> --> <!-- Persist joinNew aggregations to named directory. scour every 24 hours, delete stuff older than 90 days <AggregationCache> <scour>24 hours</scour> <maxAge>90 days</maxAge> <cachePathPolicy>NestedDirectory</cachePathPolicy> </AggregationCache> --> <!-- How to choose the template dataset for an aggregation. latest, random, or penultimate <Aggregation> <typicalDataset>penultimate</typicalDataset> </Aggregation> --> <!-- The Netcdf Subset Service is off by default. <NetcdfSubsetService> <allow>false</allow> <scour>10 min</scour> <maxAge>-1 min</maxAge> </NetcdfSubsetService> --> <!-- <Opendap> <ascLimit>50</ascLimit> <binLimit>500</binLimit> <serverVersion>opendap/3.7</serverVersion> </Opendap> --> <!-- The WCS Service is off by default. Also, off by default (and encouraged) is operating on a remote dataset. <WCS> <allow>false</allow> <allowRemote>false</allowRemote> <scour>15 min</scour> <maxAge>30 min</maxAge> </WCS> --> <!-- <WMS> <allow>false</allow> <allowRemote>false</allowRemote> <maxImageWidth>2048</maxImageWidth> <maxImageHeight>2048</maxImageHeight> </WMS> --> <!-- <NCISO> <ncmlAllow>false</ncmlAllow> <uddcAllow>false</uddcAllow> <isoAllow>false</isoAllow> </NCISO> --> <!-- CatalogGen service is off by default. <CatalogGen> <allow>false</allow> </CatalogGen> --> <!-- DLwriter service is off by default. As is support for operating on remote catalogs. <DLwriter> <allow>false</allow> <allowRemote>false</allowRemote> </DLwriter> --> <!-- DqcService is off by default. <DqcService> <allow>false</allow> </DqcService> --> <!-- Link to a Viewer application on the HTML page: <Viewer>my.package.MyViewer</Viewer> --> <!-- Add a DataSource - essentially an IOSP with access to Servlet request parameters <datasetSource>my.package.DatsetSourceImpl</datasetSource> --> <!-- set FeatureCollection logging <FeatureCollection> <RollingFileAppender> <MaxFileSize>1 MB</MaxFileSize> <MaxBackups>5</MaxBackups> <Level>INFO</Level> </RollingFileAppender> </FeatureCollection> --> <!-- Configure how the NetCDF-4 C library is discovered and used. libraryPath: The directory in which the native library is installed. libraryName: The name of the native library. This will be used to locate the proper .DLL, .SO, or .DYLIB file within the libraryPath directory. useForReading: By default, the native library is only used for writing NetCDF-4 files; a pure-Java layer is responsible for reading them. However, if this property is set to true, then it will be used for reading NetCDF-4 (and HDF5) files as well. --> <!-- <Netcdf4Clibrary> <libraryPath>/usr/local/lib</libraryPath> <libraryName>netcdf</libraryName> <useForReading>false</useForReading> </Netcdf4Clibrary> --> </threddsConfig> 24.4.2 Update the catalog For example to update the catalog with the latest data, run the following command from the root crontab. This cronjob will also synchronize the database with remote servers and dump your database (by default in /home/carya/dump) 0 * * * * /home/carya/pecan/scripts/cron.sh -o /home/carya/dump 24.4.3 Troubleshooting Refer to the log files for Tomcat (/var/log/tomcat8/*) and Thredds (/home/carya/thredds/logs). 24.4.4 Further reading Thredds reference "],["osinstall.html", "24.5 OS Specific Installations", " 24.5 OS Specific Installations Ubuntu CentOS OSX Install BETY THIS PAGE IS DEPRECATED Install Models Install Data 24.5.1 Ubuntu These are specific notes for installing PEcAn on Ubuntu (14.04) and will be referenced from the main installing PEcAn page. You will at least need to install the build environment and Postgres sections. If you want to access the database/PEcAn using a web browser you will need to install Apache. To access the database using the BETY interface, you will need to have Ruby installed. This document also contains information on how to install the Rstudio server edition as well as any other packages that can be helpful. 24.5.1.1 Install build environment sudo -s # point to latest R echo "deb http://cran.rstudio.com/bin/linux/ubuntu `lsb_release -s -c`/" > /etc/apt/sources.list.d/R.list apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E084DAB9 # update package list apt-get -y update # install packages needed for PEcAn apt-get -y install build-essential gfortran git r-base-core r-base-dev jags liblapack-dev libnetcdf-dev netcdf-bin bc libcurl4-gnutls-dev curl udunits-bin libudunits2-dev libgmp-dev python-dev libgdal1-dev libproj-dev expect # install packages needed for ED2 apt-get -y install openmpi-bin libopenmpi-dev # install requirements for DALEC apt-get -y install libgsl0-dev # install packages for webserver apt-get -y install apache2 libapache2-mod-php5 php5 # install packages to compile docs apt-get -y install texinfo texlive-latex-base texlive-latex-extra texlive-fonts-recommended # install devtools echo 'install.packages("devtools")' | R --vanilla # done as root exit 24.5.1.2 Install Postgres Documentation: http://trac.osgeo.org/postgis/wiki/UsersWikiPostGIS21UbuntuPGSQL93Apt sudo -s # point to latest PostgreSQL echo "deb http://apt.postgresql.org/pub/repos/apt `lsb_release -s -c`-pgdg main" > /etc/apt/sources.list.d/pgdg.list wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - # update package list apt-get -y update # install packages for postgresql (using a newer version than default) apt-get -y install libdbd-pgsql postgresql postgresql-client libpq-dev postgresql-9.4-postgis-2.1 postgresql-9.4-postgis-2.1-scripts # install following if you want to run pecan through the web apt-get -y install php5-pgsql # enable bety user to login with trust by adding the following lines after # the ability of postgres user to login in /etc/postgresql/9.4/main/pg_hba.conf local all bety trust host all bety 127.0.0.1/32 trust host all bety ::1/128 trust # Once done restart postgresql /etc/init.d/postgresql restart exit To install the BETYdb database .. 24.5.1.3 Apache Configuration PEcAn # become root sudo -s # get index page rm /var/www/html/index.html ln -s ${HOME}/pecan/documentation/index_vm.html /var/www/html/index.html # setup a redirect cat > /etc/apache2/conf-available/pecan.conf << EOF Alias /pecan ${HOME}/pecan/web <Directory ${HOME}/pecan/web> DirectoryIndex index.php Options +ExecCGI Require all granted </Directory> EOF a2enconf pecan /etc/init.d/apache2 restart # done as root exit 24.5.1.4 Apache Configuration BETY sudo -s # install all ruby related packages apt-get -y install ruby2.0 ruby2.0-dev libapache2-mod-passenger # link static content ln -s ${HOME}/bety/public /var/www/html/bety # setup a redirect cat > /etc/apache2/conf-available/bety.conf << EOF RailsEnv production RailsBaseURI /bety PassengerRuby /usr/bin/ruby2.0 <Directory /var/www/html/bety> Options +FollowSymLinks Require all granted </Directory> EOF a2enconf bety /etc/init.d/apache2 restart 24.5.1.5 Rstudio-server NOTE This will allow anybody to login to the machine through the rstudio interface and run any arbitrary code. The login used however is the same as the system login/password. wget http://download2.rstudio.org/rstudio-server-0.98.1103-amd64.deb # bceome root sudo -s # install required packages apt-get -y install libapparmor1 apparmor-utils libssl0.9.8 # install rstudio dpkg -i rstudio-server-* rm rstudio-server-* echo "www-address=127.0.0.1" >> /etc/rstudio/rserver.conf echo "r-libs-user=~/R/library" >> /etc/rstudio/rsession.conf rstudio-server restart # setup rstudio forwarding in apache a2enmod proxy_http cat > /etc/apache2/conf-available/rstudio.conf << EOF ProxyPass /rstudio/ http://localhost:8787/ ProxyPassReverse /rstudio/ http://localhost:8787/ RedirectMatch permanent ^/rstudio$ /rstudio/ EOF a2enconf rstudio /etc/init.d/apache2 restart # all done, exit root exit 24.5.1.6 Additional packages HDF5 Tools, netcdf, GDB and emacs sudo apt-get -y install hdf5-tools cdo nco netcdf-bin ncview gdb emacs ess nedit 24.5.2 CentOS/RedHat These are specific notes for installing PEcAn on CentOS (7) and will be referenced from the main installing PEcAn page. You will at least need to install the build environment and Postgres sections. If you want to access the database/PEcAn using a web browser you will need to install Apache. To access the database using the BETY interface, you will need to have Ruby installed. This document also contains information on how to install the Rstudio server edition as well as any other packages that can be helpful. 24.5.2.1 Install build environment sudo -s # install packages needed for PEcAn yum -y groupinstall 'Development Tools' yum -y install git netcdf-fortran-openmpi-devel R bc curl libxml2-devel openssl-devel ed udunits2 udunits2-devel netcdf netcdf-devel gmp-devel python-devel gdal-devel proj-devel proj-epsg expect # jags yum -y install http://download.opensuse.org/repositories/home:/cornell_vrdc/CentOS_7/x86_64/jags3-3.4.0-54.1.x86_64.rpm yum -y install http://download.opensuse.org/repositories/home:/cornell_vrdc/CentOS_7/x86_64/jags3-devel-3.4.0-54.1.x86_64.rpm # fix include folder for udunits2 ln -s /usr/include/udunits2/* /usr/include/ # install packages needed for ED2 yum -y install environment-modules openmpi-bin libopenmpi-dev # install requirements for DALEC yum -y install gsl-devel # install packages for webserver yum -y install httpd php systemctl enable httpd systemctl start httpd firewall-cmd --zone=public --add-port=80/tcp --permanent firewall-cmd --reload # install packages to compile docs #apt-get -y install texinfo texlive-latex-base texlive-latex-extra texlive-fonts-recommended # install devtools echo 'install.packages("devtools")' | R --vanilla # done as root exit echo "module load mpi" >> ~/.bashrc module load mpi 24.5.2.2 Install and configure PostgreSQL, udunits2, NetCDF sudo -s # point to latest PostgreSQL yum install -y epel-release yum -y install http://yum.postgresql.org/9.4/redhat/rhel-7-x86_64/pgdg-centos94-9.4-1.noarch.rpm # install packages for postgresql (using a newer version than default) yum -y install postgresql94-server postgresql94-contrib postgis2_94 postgresql94-devel udunits2 netcdf # install following if you want to run pecan through the web yum -y install php-pgsql # enable bety user to login with trust by adding the following lines after # the ability of postgres user to login in /var/lib/pgsql/9.4/data/pg_hba.conf local all bety trust host all bety 127.0.0.1/32 trust host all bety ::1/128 trust # Create database /usr/pgsql-9.4/bin/postgresql94-setup initdb # Enable postgres systemctl enable postgresql-9.4 systemctl start postgresql-9.4 exit 24.5.2.3 Apache Configuration PEcAn Install and Start Apache yum -y install httpd systemctl enable httpd systemctl start httpd # become root sudo -s # get index page rm /var/www/html/index.html ln -s /home/carya/pecan/documentation/index_vm.html /var/www/html/index.html # fix selinux context (does this need to be done after PEcAn is installed?) chcon -R -t httpd_sys_content_t /home/carya/pecan /home/carya/output # setup a redirect cat > /etc/httpd/conf.d/pecan.conf << EOF Alias /pecan /home/carya/pecan/web <Directory /home/carya/pecan/web> DirectoryIndex index.php Options +ExecCGI Require all granted </Directory> EOF a2enconf pecan /etc/init.d/apache2 restart # done as root exit 24.5.2.4 Apache Configuration BETY sudo -s # install all ruby related packages sudo curl --fail -sSLo /etc/yum.repos.d/passenger.repo https://oss-binaries.phusionpassenger.com/yum/definitions/el-passenger.repo yum -y install ruby ruby-devel mod_passenger # link static content ln -s /home/carya/bety/public /var/www/html/bety # fix GemFile echo 'gem "test-unit"' >> bety/Gemlile # fix selinux context (does this need to be done after bety is installed?) chcon -R -t httpd_sys_content_t /home/carya/bety # setup a redirect cat > /etc/httpd/conf.d/bety.conf << EOF RailsEnv production RailsBaseURI /bety PassengerRuby /usr/bin/ruby <Directory /var/www/html/bety> Options +FollowSymLinks Require all granted </Directory> EOF systemctl restart httpd 24.5.2.5 Rstudio-server NEED FIXING NOTE This will allow anybody to login to the machine through the rstudio interface and run any arbitrary code. The login used however is the same as the system login/password. wget http://download2.rstudio.org/rstudio-server-0.98.1103-amd64.deb # bceome root sudo -s # install required packages apt-get -y install libapparmor1 apparmor-utils libssl0.9.8 # install rstudio dpkg -i rstudio-server-* rm rstudio-server-* echo "www-address=127.0.0.1" >> /etc/rstudio/rserver.conf echo "r-libs-user=~/R/library" >> /etc/rstudio/rsession.conf rstudio-server restart # setup rstudio forwarding in apache a2enmod proxy_http cat > /etc/apache2/conf-available/rstudio.conf << EOF ProxyPass /rstudio/ http://localhost:8787/ ProxyPassReverse /rstudio/ http://localhost:8787/ RedirectMatch permanent ^/rstudio$ /rstudio/ EOF a2enconf rstudio /etc/init.d/apache2 restart # all done, exit root exit Alternative Rstudio instructions 24.5.2.5.1 Install and configure Rstudio-server Install RStudio Server by following the official documentation for your platform. Then, proceed with the following: add PATH=$PATH:/usr/sbin:/sbin to /etc/profile cat "PATH=$PATH:/usr/sbin:/sbin; export PATH" >> /etc/profile add rstudio.conf to /etc/httpd/conf.d/ wget https://gist.github.com/dlebauer/6921889/raw/d1e0f945228e5519afa6223d6f49d6e0617262bd/rstudio.conf sudo mv rstudio.conf /httpd/conf.d/ restart the Apache server: sudo httpd restart now you should be able to access http://<server>/rstudio 24.5.2.6 Install ruby-netcdf gem cd $RUBY_APPLICATION_HOME export $NETCDF_URL=http://www.gfd-dennou.org/arch/ruby/products/ruby-netcdf/release/ruby-netcdf-0.6.6.tar.gz export $NETCDF_DIR=/usr/local/netcdf gem install narray export NARRAY_DIR="$(ls $GEM_HOME/gems | grep 'narray-')" export NARRAY_PATH="$GEM_HOME/gems/$NARRAY_DIR" cd $MY_RUBY_HOME/bin wget $NETCDF_URL -O ruby-netcdf.tgz tar zxf ruby-netcdf.tgz && cd ruby-netcdf-0.6.6/ ruby -rubygems extconf.rb --with-narray-include=$NARRAY_PATH --with-netcdf-dir=/usr/local/netcdf-4.3.0 sed -i 's|rb/$|rb|' Makefile make make install cd ../ && sudo rm -rf ruby-netcdf* cd $RUBY_APPLICATION bundle install --without development 24.5.2.7 Additional packages NEED FIXING HDF5 Tools, netcdf, GDB and emacs sudo apt-get -y install hdf5-tools cdo nco netcdf-bin ncview gdb emacs ess nedit 24.5.3 Mac OSX These are specific notes for installing PEcAn on Mac OSX and will be referenced from the main installing PEcAn page. You will at least need to install the build environment and Postgres sections. If you want to access the database/PEcAn using a web browser you will need to install Apache. To access the database using the BETY interface, you will need to have Ruby installed. This document also contains information on how to install the Rstudio server edition as well as any other packages that can be helpful. 24.5.3.1 Install build environment # install R # download from http://cran.r-project.org/bin/macosx/ # install gfortran # download from http://cran.r-project.org/bin/macosx/tools/ # install OpenMPI curl -o openmpi-1.6.3.tar.gz http://www.open-mpi.org/software/ompi/v1.6/downloads/openmpi-1.6.3.tar.gz tar zxf openmpi-1.6.3.tar.gz cd openmpi-1.6.3 ./configure --prefix=/usr/local make all sudo make install cd .. # install szip curl -o szip-2.1-MacOSX-intel.tar.gz ftp://ftp.hdfgroup.org/lib-external/szip/2.1/bin/szip-2.1-MacOSX-intel.tar.gz tar zxf szip-2.1-MacOSX-intel.tar.gz sudo mv szip-2.1-MacOSX-intel /usr/local/szip # install HDF5 curl -o hdf5-1.8.11.tar.gz http://www.hdfgroup.org/ftp/HDF5/current/src/hdf5-1.8.11.tar.gz tar zxf hdf5-1.8.11.tar.gz cd hdf5-1.8.11 sed -i -e 's/-O3/-O0/g' config/gnu-flags ./configure --prefix=/usr/local/hdf5 --enable-fortran --enable-cxx --with-szlib=/usr/local/szip make # make check sudo make install # sudo make check-install cd .. 24.5.3.2 Install Postgres For those on a Mac I use the following app for postgresql which has postgis already installed (http://postgresapp.com/) To get postgis run the following commands in psql: ##### Enable PostGIS (includes raster) CREATE EXTENSION postgis; ##### Enable Topology CREATE EXTENSION postgis_topology; ##### fuzzy matching needed for Tiger CREATE EXTENSION fuzzystrmatch; ##### Enable US Tiger Geocoder CREATE EXTENSION postgis_tiger_geocoder; To check your postgis run the following command again in psql: SELECT PostGIS_full_version(); 24.5.3.3 Additional installs 24.5.3.3.1 Install JAGS Download JAGS from http://sourceforge.net/projects/mcmc-jags/files/JAGS/3.x/Mac%20OS%20X/JAGS-Mavericks-3.4.0.dmg/download 24.5.3.3.2 Install udunits Installing udunits-2 on MacOSX is done from source. download most recent version of Udunits here instructions for compiling from source curl -o udunits-2.1.24.tar.gz ftp://ftp.unidata.ucar.edu/pub/udunits/udunits-2.1.24.tar.gz tar zxf udunits-2.1.24.tar.gz cd udunits-2.1.24 ./configure make sudo make install 24.5.3.4 Apache Configuration Mac does not support pdo/postgresql by default. The easiest way to install is use: http://php-osx.liip.ch/ To enable pecan to run from your webserver. cat > /etc/apache2/others/pecan.conf << EOF Alias /pecan ${PWD}/pecan/web <Directory ${PWD}/pecan/web> DirectoryIndex index.php Options +All Require all granted </Directory> EOF 24.5.3.5 Ruby The default version of ruby should work. Or use JewelryBox. 24.5.3.6 Rstudio Server For the mac you can download Rstudio Desktop. 24.5.4 Installing BETY **************THIS PAGE IS DEPRECATED************* Official Instructions for BETY are maintained here: https://pecan.gitbook.io/betydb-documentation If you would like to install the Docker Version of BETY, please consult the PEcAn Docker section. 24.5.4.1 Install Database + Data note To install BETYdb without PEcAn, first download the load.bety.sh script # install database (code assumes password is bety) sudo -u postgres createuser -d -l -P -R -S bety sudo -u postgres createdb -O bety bety sudo -u postgres ./scripts/load.bety.sh -c YES -u YES -r 0 sudo -u postgres ./scripts/load.bety.sh -r 1 sudo -u postgres ./scripts/load.bety.sh -r 2 # configure for PEcAn web app (change password if needed) cp web/config.example.php web/config.php # add models to database (VM only) ./scripts/add.models.sh # add data to database ./scripts/add.data.sh # create outputs folder mkdir ~/output chmod 777 ~/output 24.5.4.2 Installing BETYdb Web Application There are two flavors of BETY, PHP and RUBY. The PHP version allows for a minimal interaction with the database while the RUBY version allows for full interaction with the database. 24.5.4.2.1 PHP version The php version comes with PEcAn and is already configured. 24.5.4.2.2 RUBY version The RUBY version requires a few extra packages to be installed first. Next we install the web app. # install bety cd git clone https://github.com/PecanProject/bety.git # install gems cd bety sudo gem2.0 install bundler bundle install --without development:test:javascript_testing:debug and configure BETY # create folders for upload folders mkdir paperclip/files paperclip/file_names chmod 777 paperclip/files paperclip/file_names # create folder for log files mkdir log touch log/production.log chmod 0666 log/production.log # fix configuration for vm cp config/additional_environment_vm.rb config/additional_environment.rb chmod go+w public/javascripts/cache/ # setup bety database configuration cat > config/database.yml << EOF production: adapter: postgis encoding: utf-8 reconnect: false database: bety pool: 5 username: bety password: bety EOF # setup login tokens cat > config/initializers/site_keys.rb << EOF REST_AUTH_SITE_KEY = 'thisisnotasecret' REST_AUTH_DIGEST_STRETCHES = 10 EOF 24.5.5 Install Models This page contains instructions on how to download and install ecosystem models that have been or are being coupled to PEcAn. These instructions have been tested on the PEcAn unbuntu VM. Commands may vary on other operating systems. Also, some model downloads require permissions before downloading, making them unavailable to the general public. Please contact the PEcAn team if you would like access to a model that is not already installed on the default PEcAn VM. BioCro CLM 4.5 DALEC ED2 FATES GDAY JULES LINKAGES LPJ-GUESS MAESPA SIPNET 24.5.5.1 BioCro # Version 0.951 echo 'devtools::install_github("ebimodeling/biocro@0.951")' | R --vanilla # Version 1.x echo 'devtools::install_github("ebimodeling/biocro-dev@26e0125b")' | R --vanilla BioCro Developers: request access from @dlebauer on GitHub 24.5.5.2 CLM 4.5 The version of CLM installed on PEcAn is the ORNL branch provided by Dan Ricciuto. This version includes Dan’s point-level CLM processing scripts Download the code (~300M compressed), input data (1.7GB compressed and expands to 14 GB), and a few misc inputs. mkdir models cd models wget ftp://nacp.ornl.gov/synthesis/2008/firenze/site/clm4_5_1_r085.tar.gz wget ftp://nacp.ornl.gov/synthesis/2008/firenze/site/clm/ccsm_inputdata.tar.gz tar -xvzf clm4_5* tar -xvzf ccsm_inputdata.tar.gz #Parameter file: mkdir /home/carya/models/ccsm_inputdata/lnd/clm2/paramdata cd /home/carya/models/ccsm_inputdata/lnd/clm2/paramdata wget ftp://nacp.ornl.gov/synthesis/2008/firenze/site/clm_params.c130821.nc wget ftp://nacp.ornl.gov/synthesis/2008/firenze/site/clm_params.c140423.nc #Domain file: cd /home/carya/models/ccsm_inputdata/share/domains/domain.clm/ wget ftp://nacp.ornl.gov/synthesis/2008/firenze/site/domain.lnd.1x1pt_US-UMB_navy.nc #Aggregated met data file: cd /home/carya/models/ccsm_inputdata/atm/datm7/CLM1PT_data/1x1pt_US-UMB wget ftp://nacp.ornl.gov/synthesis/2008/firenze/site/all_hourly.nc ## lightning database cd /home/carya/models/ccsm_inputdata/atm/datm7/NASA_LIS/ wget ftp://nacp.ornl.gov/synthesis/2008/firenze/site/clmforc.Li_2012_climo1995-2011.T62.lnfm_Total_c140423.nc ## surface data cd /home/carya/models/ccsm_inputdata/lnd/clm2/surfdata wget ftp://nacp.ornl.gov/synthesis/2008/firenze/site/clm/surfdata_360x720cru_simyr1850_c130927.nc cd /home/carya/models/ccsm_inputdata/lnd/clm2/surfdata_map wget ftp://nacp.ornl.gov/synthesis/2008/firenze/site/clm/surfdata_1x1pt_US-UMB_I1850CLM45CN_simyr1850.nc_new mv surfdata_1x1pt_US-UMB_I1850CLM45CN_simyr1850.nc_new surfdata_1x1pt_US-UMB_I1850CLM45CN_simyr1850.nc Required libraries sudo apt-get install mercurial csh tcsh subversion cmake sudo ln -s /usr/bin/make /usr/bin/gmake Compile and build default inputs cd ~/carya/models/clm4_5_1_r085/scripts python runCLM.py --site US-UMB ––compset I1850CLM45CN --mach ubuntu --ccsm_input /home/carya/models/ccsm_inputdata --tstep 1 --nopointdata --coldstart --cpl_bypass --clean_build 24.5.5.2.1 CLM Test Run You will see a new directory in scripts: US-UMB_I1850CLM45CN Enter this directory and run (you shouldn’t have to do this normally, but there is a bug with the python script and doing this ensures all files get to the right place): ./US-UMB_I1850CLM45CN.build Next you are ready to go to the run directory: /home/carya/models/clm4_5_1_r085/run/US-UMB_I1850CLM45CN/run Open to edit file: datm.streams.txt.CLM1PT.CLM_USRDAT and check file paths such that all paths start with /home/carya/models/ccsm_inputdata From this directory, launch the executable that resides in the bld directory: /home/carya/clm4_5_1_r085/run/US-UMB_I1850CLM45CN/bld/cesm.exe not sure this was the right location, but wherever the executable is You should begin to see output files that look like this: US-UMB_I1850CLM45CN.clm2.h0.yyyy-mm.nc (yyyy is year, mm is month) These are netcdf files containing monthly averages of lots of variables. The lnd_in file in the run directory can be modified to change the output file frequency and variables. 24.5.5.3 DALEC cd curl -o dalec_EnKF_pub.tgz http://isda.ncsa.illinois.edu/~kooper/EBI/dalec_EnKF_pub.tgz tar zxf dalec_EnKF_pub.tgz rm dalec_EnKF_pub.tgz cd dalec_EnKF_pub make dalec_EnKF make dalec_seqMH sudo cp dalec_EnKF dalec_seqMH /usr/local/bin 24.5.5.4 ED2 24.5.5.4.1 ED2.2 r46 (used in PEcAn manuscript) # ---------------------------------------------------------------------- # Get version r46 with a few patches for ubuntu cd curl -o ED.r46.tgz http://isda.ncsa.illinois.edu/~kooper/EBI/ED.r46.tgz tar zxf ED.r46.tgz rm ED.r46.tgz # ---------------------------------------------------------------------- # configure and compile ed cd ~/ED.r46/ED/build/bin curl -o include.mk.VM http://isda.ncsa.illinois.edu/~kooper/EBI/include.mk.opt.`uname -s` make OPT=VM sudo cp ../ed_2.1-VM /usr/local/bin/ed2.r46 Perform a test run using pre configured ED settings for ED2.2 r46 # ---------------------------------------------------------------------- # Create sample run cd mkdir testrun.ed.r46 cd testrun.ed.r46 curl -o ED2IN http://isda.ncsa.illinois.edu/~kooper/EBI/ED2IN.r46 sed -i -e "s#\\$HOME#$HOME#" ED2IN curl -o config.xml http://isda.ncsa.illinois.edu/~kooper/EBI/config.r46.xml # execute test run time ed2.r46 24.5.5.4.2 ED 2.2 r82 cd curl -o ED.r82.tgz http://isda.ncsa.illinois.edu/~kooper/EBI/ED.r82.tgz tar zxf ED.r82.tgz rm ED.r82.tgz cd ED.r82 curl -o ED.r82.patch http://isda.ncsa.illinois.edu/~kooper/EBI/ED.r82.patch patch -p1 < ED.r82.patch cd ED/build/bin curl -o include.mk.VM http://isda.ncsa.illinois.edu/~kooper/EBI/include.mk.opt.`uname -s` make OPT=VM sudo cp ../ed_2.1-VM /usr/local/bin/ed2.r82 Perform a test run using pre configured ED settings for ED2.2 r82 cd mkdir testrun.ed.r82 cd testrun.ed.r82 curl -o ED2IN http://isda.ncsa.illinois.edu/~kooper/EBI/ED2IN.r82 sed -i -e "s#\\$HOME#$HOME#" ED2IN curl -o config.xml http://isda.ncsa.illinois.edu/~kooper/EBI/config.r82.xml # execute test run time ed2.r82 24.5.5.4.3 ED 2.2 bleeding edge cd git clone https://github.com/EDmodel/ED2.git cd ED2/ED/build/bin curl -o include.mk.VM http://isda.ncsa.illinois.edu/~kooper/EBI/include.mk.opt.`uname -s` ./generate_deps.sh make OPT=VM sudo cp ../ed_2.1-VM /usr/local/bin/ed2.git 24.5.5.5 CLM-FATES Prerequisites sudo apt-get upgrade libnetcdf-dev sudo apt-get install subversion sudo apt-get install csh sudo apt-get install cmake sudo ln -s /usr/bin/make /usr/bin/gmake sudo rm /bin/sh sudo ln -s /bin/bash /bin/sh wget https://github.com/Unidata/netcdf-fortran/archive/v4.4.4.tar.gz cd netcdf-4.4.4 ./configure make sudo make install you might need to mess around with installing netcdf and netcdf-fortran to get a version FATES likes… Get code from Github (currently private) and go to cime/scripts directory git clone git@github.com:NGEET/ed-clm.git cd ed-clm/cime/scripts/ Within CLM-FATES, to be able to build an executable we need to create a reference run. We’ll also use this reference run to grab defaults from, so we’ll be registering the location of both the reference case (location of executable, scripts, etc) and the reference inputs with the PEcAn database. To begin, copy reference run script from pecan cp ~/pecan/models/fates/inst/create_1x1_ref_case.sh . Edit reference case script to set NETCDF_HOME, CROOT (reference run case), DIN_LOC_ROOT (reference run inputs). Also, make sure DIN_LOC_ROOT exists as FATES will not create it itself. Then run the script ./create_1x1_ref_case.sh Be aware that this script WILL ask you for your password on the NCAR server to download the reference case input data (the guest password may work, haven’t tried this). If it gives an error at the pio stage check the log, but the most likely error is it being unable to find a version of netcdf it likes. Once FATES is installed, set the whole reference case directory as the Model path (leave filename blank) and set the whole inputs directory as an Input with format clm_defaults. 24.5.5.6 GDAY Navigate to a directory you would like to store GDAY and run the following: git clone https://github.com/mdekauwe/GDAY.git cd GDAY cd src make gday is your executable. 24.5.5.7 JULES INSTALL STEPS: 1) Download JULES and FCM JULES: Model requires registration to download. Not to be put on PEcAn VM Getting Started documentation: https://jules.jchmr.org/content/getting-started Registration: http://jules-lsm.github.io/access_req/JULES_access.html FCM: https://github.com/metomi/fcm/ wget https://github.com/metomi/fcm/archive/2015.05.0.tar.gz edit makefile open etc/fcm-make/make.cfg set JULES_NETCDF = actual instead of dummy set path (e.g. /usr/) and lib_path /lib64 to netCDF libraries compile JULES cd etc/fcm-make/ {path.to.fcm}/fcm make -f etc/fcm-make/make.cfg --new UBUNTU VERSION: installed without having to add any perl libraries #perl stuff that I had to install on pecan2 not PEcAN VM sudo yum install perl-Digest-SHA sudo yum install perl-Time-modules sudo yum install cpan curl -L http://cpanmin.us | perl - --sudo App::cpanminus sudo cpanm Time/Piece.pm sudo cpanm IO/Uncompress/Gunzip.pm Executable is under build/bin/jules.exe Example rundir: examples/point_loobos 24.5.5.8 LINKAGES 24.5.5.8.1 R Installation # Public echo 'devtools::install_github("araiho/linkages_package")' | R --vanilla 24.5.5.8.2 FORTRAN VERSION #FORTRAN VERSION cd git clone https://github.com/araiho/Linkages.git cd Linkages gfortran -o linkages linkages.f sudo cp linkages /usr/local/bin/linkages.git 24.5.5.9 LPJ-GUESS Instructions to download source code Go to LPJ-GUESS website for instructions to access code. 24.5.5.10 MAESPA Navigate to a directory you would like store MAESPA and run the following: git clone https://bitbucket.org/remkoduursma/maespa.git cd maespa make maespa.out is your executable. Example input files can be found in the inpufiles directory. Executing measpa.out from within one of the example directories will produce output. MAESPA developers have also developed a wrapper package called Maeswrap. The usual R package installation method install.packages may present issues with downloading an unpacking a dependency package called rgl. Here are a couple of solutions: 24.5.5.10.1 Solution 1 ### From the Command Line sudo apt-get install r-cran-rgl then from within R install.packages("Maeswrap") 24.5.5.10.2 Solution 2 ### From the Command line sudo apt-get install libglu1-mesa-dev then from within R install.packages("Maeswrap") 24.5.5.11 SIPNET {inst-sipnet} cd curl -o sipnet_unk.tar.gz http://isda.ncsa.illinois.edu/~kooper/EBI/sipnet_unk.tar.gz tar zxf sipnet_unk.tar.gz rm sipnet_unk.tar.gz cd sipnet_unk make sudo cp sipnet /usr/local/bin/sipnet.runk 24.5.5.11.1 SIPNET testrun cd curl -o testrun.sipnet.tar.gz http://isda.ncsa.illinois.edu/~kooper/EBI/testrun.sipnet.tar.gz tar zxf testrun.sipnet.tar.gz rm testrun.sipnet.tar.gz cd testrun.sipnet sipnet.runk 24.5.6 Installing data for PEcAn PEcAn assumes some of the data to be installed on the machine. This page will describe how to install this data. 24.5.6.1 Site Information These are large-ish files that contain data used with ED2 and SIPNET rm -rf sites curl -o sites.tgz http://isda.ncsa.illinois.edu/~kooper/EBI/sites.tgz tar zxf sites.tgz sed -i -e "s#/home/kooper/Projects/EBI#${PWD}#" sites/*/ED_MET_DRIVER_HEADER rm sites.tgz rm -rf inputs curl -o inputs.tgz http://isda.ncsa.illinois.edu/~kooper/EBI/inputs.tgz tar zxf inputs.tgz rm inputs.tgz 24.5.6.2 FIA database FIA database is large and will add an extra 10GB to the installation. # download and install database curl -o fia5data.psql.gz http://isda.ncsa.illinois.edu/~kooper/EBI/fia5data.psql.gz dropdb --if-exists fia5data createdb -O bety fia5data gunzip fia5data.psql.gz psql -U bety -d fia5data < fia5data.psql rm fia5data.psql 24.5.6.3 Flux Camp Following will install the data for flux camp (as well as the demo script for PEcAn). cd curl -o plot.tgz http://isda.ncsa.illinois.edu/~kooper/EBI/plot.tgz tar zxf plot.tgz rm plot.tgz 24.5.6.4 Harvard for ED tutorial Add datasets and runs curl -o Santarem_Km83.zip http://isda.ncsa.illinois.edu/~kooper/EBI/Santarem_Km83.zip unzip -d sites Santarem_Km83.zip sed -i -e "s#/home/pecan#${HOME}#" sites/Santarem_Km83/ED_MET_DRIVER_HEADER rm Santarem_Km83.zip curl -o testrun.s83.zip http://isda.ncsa.illinois.edu/~kooper/EBI/testrun.s83.zip unzip testrun.s83.zip sed -i -e "s#/home/pecan#${HOME}#" testrun.s83/ED2IN rm testrun.s83.zip curl -o ed2ws.harvard.tgz http://isda.ncsa.illinois.edu/~kooper/EBI/ed2ws.harvard.tgz tar zxf ed2ws.harvard.tgz mkdir ed2ws.harvard/analy ed2ws.harvard/histo sed -i -e "s#/home/pecan#${HOME}#g" ed2ws.harvard/input_harvard/met_driver/HF_MET_HEADER ed2ws.harvard/ED2IN ed2ws.harvard/*.r rm ed2ws.harvard.tgz curl -o testrun.PDG.zip http://isda.ncsa.illinois.edu/~kooper/EBI/testrun.PDG.zip unzip testrun.PDG.zip sed -i -e "s#/home/pecan#${HOME}#" testrun.PDG/Met/PDG_MET_DRIVER testrun.PDG/Template/ED2IN sed -i -e 's#/n/scratch2/moorcroft_lab/kzhang/PDG/WFire_Pecan/##' testrun.PDG/Template/ED2IN rm testrun.PDG.zip curl -o create_met_driver.tar.gz http://isda.ncsa.illinois.edu/~kooper/EBI/create_met_driver.tar.gz tar zxf create_met_driver.tar.gz rm create_met_driver.tar.gz "],["docker-index.html", "25 Docker", " 25 Docker This chapter describes the PEcAn Docker container infrastructure. It contains the following sections: Introduction to Docker – Brief introduction to Docker and docker-compose Docker quickstart – Brief tutorial for setting up a Docker-based PEcAn instance PEcAn Docker Architecture – Detailed description of the containers comprising the PEcAn Docker-based infrastructure Dockerfiles for models – General guide for writing Dockerfiles for new models Building and modifying images Troubleshooting Docker Migrating from VM to Docker – Steps to migrate from running PEcAn on a VM to a docker. "],["docker-intro.html", "25.1 Introduction to Docker?", " 25.1 Introduction to Docker? What is Docker Working with Docker docker-compose 25.1.1 What is Docker? For a quick and accessible introduction to Docker, we suggest this YouTube video: Learn Docker in 12 Minutes. For more comprehensive Docker documentation, we refer you to the Docker documentation website. For a useful analogy for Docker containerization, we refer you to the webcomic xkcd. Docker is a technology for encapsulating software in “containers”, somewhat similarly to virtual machines. Like virtual machines, Docker containers facilitate software distribution by bundling the software with all of its dependencies in a single location. Unlike virtual machines, Docker containers are meant to only run a single service or process and are build on top of existing services provided by the host OS (such as disk access, networking, memory management etc.). In Docker, an image refers to a binary snapshot of a piece of software and all of its dependencies. A container refers to a running instance of a particular image. A good rule of thumb is that each container should be responsible for no more than one running process. A software stack refers to a collection of containers, each responsible for its own process, working together to power a particular application. Docker makes it easy to run multiple software stacks at the same time in parallel on the same machine. Stacks can be given a unique name, which is passed along as a prefix to all their containers. Inside these stacks, containers can communicate using generic names not prefixed with the stack name, making it easy to deploy multiple stacks with the same internal configuration. Containers within the same stack communicate with each other via a common network. Like virtual machines or system processes, Docker stacks can also be instructed to open specific ports to facilitate communication with the host and other machines. The PEcAn database BETY provides an instructive case-study. BETY is comprised of two core processes – a PostgreSQL database, and a web-based front-end to that database (Apache web server with Ruby on Rails). Running BETY as a “Dockerized” application therefore involves two containers – one for the PostgreSQL database, and one for the web server. We could build these containers ourselves by starting from a container with nothing but the essentials of a particular operating system, but we can save some time and effort by starting with an existing image for PostgreSQL from Docker Hub. When starting a Dockerized BETY, we start the PostgreSQL container first, then start the BETY container telling it how to communicate with the PostgreSQL container. To upgrade an existing BETY instance, we stop the BETY container, download the latest version, tell it to upgrade the database, and re-start the BETY container. There is no need to install new dependencies for BETY since they are all shipped as part of the container. The PEcAn Docker architecture is designed to facilitate installation and maintenance on a variety of systems by eliminating the need to install and maintain complex system dependencies (such as PostgreSQL, Apache web server, and Shiny server). Furthermore, using separate Docker containers for each ecosystem model helps avoid clashes between different software version requirements of different models (e.g. some models require GCC <5.0, while others may require GCC >=5.0). The full PEcAn Docker stack is described in more detail in the next section. 25.1.2 Working with Docker To run an image, you can use the Docker command line interface. For example, the following runs a PostgreSQL image based on the pre-existing PostGIS image by mdillon: docker run \\ --detach \\ --rm \\ --name postgresql \\ --network pecan \\ --publish 9876:5432 \\ --volume ${PWD}/postgres:/var/lib/postgresql/data \\ mdillon/postgis:9.6-alpine This will start the PostgreSQL+PostGIS container. The following options were used: --detach makes the container run in the background. --rm removes the container when it is finished (make sure to use the volume below). --name the name of the container, also the hostname of the container which can be used by other docker containers in the same network inside docker. --network pecan the network that the container should be running in, this leverages of network isolation in docker and allows this container to be connected to by others using the postgresql hostname. --publish exposes the port to the outside world, this is like ssh, and maps port 9876 to port 5432 in the docker container --volume maps a folder on your local machine to the machine in the container. This allows you to save data on your local machine. mdillon/postgis:9.6-alpine is the actual image that will be run, in this case it comes from the group/person mdillon, the container is postgis and the version 9.6-alpine (version 9.6 build on alpine linux). Other options that might be used: --tty allocate a pseudo-TTY to send stdout and stderr back to the console. --interactive keeps stdin open so the user can interact with the application running. --env sets environment variables, these are often used to change the behavior of the docker container. To see a list of all running containers you can use the following command: docker ps To see the log files of this container you use the following command (you can either use their name or id as returned by docker ps). The -f flag will follow the stdout/stderr from the container, use Ctrl-C to stop following the stdout/stderr. docker logs -f postgresql To stop a running container use: docker stop postgresql Containers that are running in the foreground (without the --detach) can be stopped by pressing Ctrl-C. Any containers running in the background (with --detach) will continue running until the machine is restarted or the container is stopped using docker stop. 25.1.3 docker-compose For a quick introduction to docker-compose, we recommend the following YouTube video: Docker Compose in 12 Minutes. The complete docker-compose references can be found on the Docker documentation website. docker-compose provides a convenient way to configure and run a multi-container Docker stack. Basically, a docker-compose setup consists of a list of containers and their configuration parameters, which are then internally converted into a bunch of docker commands. To configure BETY as described above, we can use a docker-compose.yml file like the following: version: "3" services: postgres: image: mdillon/postgis:9.5 bety: image: pecan/bety depends_on: - postgres This simple file allows us to bring up a full BETY application with both database and BETY application. The BETY app will not be brought up until the database container has started. You can now start this application by changing into the same directory as the docker-compose.yml file (cd /path/to/file) and then running: docker-compose up This will start the application, and you will see the log files for the 2 different containers. "],["docker-quickstart.html", "25.2 Quick-start docker install", " 25.2 Quick-start docker install git clone git@github.com/pecanproject/pecan cd pecan # start database docker-compose -p pecan up -d postgres # add example data (first time only) docker-compose run --rm bety initialize docker run -ti --rm --network pecan_pecan --volume pecan_pecan:/data --env FQDN=docker pecan/data:develop # start PEcAn docker-compose -p pecan up -d # run a model curl -v -X POST \\ -F 'hostname=docker' \\ -F 'modelid=1000000014' \\ -F 'sitegroupid=1' \\ -F 'siteid=772' \\ -F 'sitename=Niwot Ridge Forest/LTER NWT1 (US-NR1)' \\ -F 'pft[]=temperate.coniferous' \\ -F 'start=2004/01/01' \\ -F 'end=2004/12/31' \\ -F 'input_met=5000000005' \\ -F 'email=' \\ -F 'notes=' \\ 'http://pecan.localhost/pecan/04-runpecan.php' "],["the-pecan-docker-install-process-in-detail.html", "25.3 The PEcAn docker install process in detail", " 25.3 The PEcAn docker install process in detail 25.3.1 Configure docker-compose This section will let you download some configuration files. The documentation provides links to the latest released version (master branch in GitHub) or the develop version that we are working on (develop branch in GitHub) which will become the next release. If you cloned the PEcAn GitHub repository you can use git checkout <branch> to switch branches. The PEcAn Docker stack is configured using a docker-compose.yml file. You can download just this file directly from GitHub latest or develop. You can also find this file in the root of cloned PEcAn GitHub repository. There is no need to edit the docker-compose.yml file. You can use either the .env file to change some of the settings, or the docker-compose.override.yml file to modify the docker-compose.yml file. This makes it easier for you to get an updated version of the docker-compose.yml file and not lose any changes you have made to it. Some of the settings in the docker-compose.yml can be set using a .env file. You can download either the latest or the develop version. If you have cloned the GitHub repository it is also located in the docker folder. This file should be called .env and be placed in the same folder as your docker-compose.yml file. This file will allow you to set which version of PEcAn or BETY to use. See the comments in this file to control the settings. Option you might want to set are: PECAN_VERSION : The docker images to use for PEcAn. The default is latest which is the latest released version of PEcAn. Setting this to develop will result in using the version of PEcAn which will become the next release. PECAN_FQDN : Is the name of the server where PEcAn is running. This is what is used to register all files generated by this version of PEcAn (see also TRAEFIK_HOST). PECAN_NAME : A short name of this PEcAn server that is shown in the pull down menu and might be easier to recognize. BETY_VERSION : This controls the version of BETY. The default is latest which is the latest released version of BETY. Setting this to develop will result in using the version of BETY which will become the next release. TRAEFIK_HOST : Should be the FQDN of the server, this is needed when generating a SSL certificate. For SSL certificates you will need to set TRAEFIK_ACME_ENABLE as well as TRAEFIK_ACME_EMAIL. TRAEFIK_IPFILTER : is used to limit access to certain resources, such as RabbitMQ and the Traefik dashboard. A final file, which is optional, is a docker-compose.override.yml. You can download a version for the latest and develop versions. If you have cloned the GitHub repository it is located in the docker folder. Use this file as an example of what you can do, only copy the pieces over that you really need. This will allow you to make changes to the docker-compose file for your local installation. You can use this to add additional containers to your stack, change the path where docker stores the data for the containers, or you can use this to open up the postgresql port. version: "3" services: # expose database to localhost for ease of access postgres: ports: - 5432:5432 Once you have the docker-compose.yml file as well as the optional .env and docker-compose.override.yml in a folder you can start the PEcAn stack. The following instructions assume you are in the same directory as the file (if not, cd into it). In the rest of this section we will use a few arguments for the docker-compose application. The location of these arguments are important. The general syntax of docker-compose is docker-compose <ARGUMENTS FOR DOCKER COMPOSE> <COMMAND> <ARGUMENTS FOR COMMAND> [SERVICES]. More generally, docker-compose options are very sensitive to their location relative to other commands in the same line – that is, docker-compose -f /my/docker-compose.yml -p pecan up -d postgres is not the same as docker-compose -d postgres -p pecan up -f /my/docker-compose.yml. If expected ever don’t seem to be working, check that the arguments are in the right order.) -f <filename> : ARGUMENTS FOR DOCKER COMPOSE : Allows you to specify a docker-compose.yml file explicitly. You can use this argument multiple times. Default is to use the docker-compose.yml and docker-compose.override.yml in your current folder. -p <projectname> : ARGUMENTS FOR DOCKER COMPOSE : Project name, all volumes, networks, and containers will be prefixed with this argument. The default value is to use the current folder name. -d : ARGUMENTS FOR up COMMAND : Will start all the containers in the background and return back to the command shell. If no services as added to the docker-compose command all services possible will be started. 25.3.2 Initialize PEcAn (first time only) Before you can start to use PEcAn for the first time you will need to initialize the database (and optionally add some data). The following two sections will first initialize the database and secondly add some data to the system. 25.3.2.1 Initialize the PEcAn database The commands described in this section will set up the PEcAn database (BETY) and pre-load it with some common “default” data. docker-compose -p pecan up -d postgres # If you have a custom docker-compose file: # docker-compose -f /path/to/my-docker-compose.yml -p pecan up -d postgres The breakdown of this command is as follows: -p pecan – This tells docker-compose to do all of this as part of a “project” -p we’ll call pecan. By default, the project name is set to the name of the current working directory. The project name will be used as a prefix to all containers started by this docker-compose instance (so, if we have a service called postgres, this will create a container called pecan_postgres). up -d – up is a command that initializes the containers. Initialization involves downloading and building the target containers and any containers they depend on, and then running them. Normally, this happens in the foreground, printing logs directly to stderr/stdout (meaning you would have to interrupt it with Ctrl-C), but the -d flag forces this to happen more quietly and in the background. postgres – This indicates that we only want to initialize the service called postgres (and its dependencies). If we omitted this, docker-compose would initialize all containers in the stack. The end result of this command is to initialize a “blank” PostGIS container that will run in the background. This container is not connected to any data (yet), and is basically analogous to just installing and starting PostgreSQL to your system. As a side effect, the above command will also create blank data “volumes” and a “network” that containers will use to communicate with each other. Because our project is called pecan and docker-compose.yml describes a network called pecan, the resulting network is called pecan_pecan. This is relevant to the following commands, which will actually initialize and populate the BETY database. Assuming the above has run successfully, next run the following: docker run --rm --network pecan_pecan pecan/db The breakdown of this command is as follows: {#docker-run-init} docker run – This says we will be running a container. --rm – This automatically removes the resulting container once the specified command exits, as well as any volumes associated with the container. This is useful as a general “clean-up” flag for one-off commands (like this one) to make sure you don’t leave any “zombie” containers or volumes around at the end. --network pecan_pecan – Thsi will start the container in the same network space as the posgres container, allowing it to push data into the database. pecan/db – This is the name of the container, this holds a copy of the database used to initialize the postgresql database. Note that this command may throw a bunch of errors related to functions and/or operators already existing. This is normal – it just means that the PostGIS extension to PostgreSQL is already installed. The important thing is that you see output near the end like: ---------------------------------------------------------------------- Safety checks ---------------------------------------------------------------------- ---------------------------------------------------------------------- Making sure user 'bety' exists. If you do not see this output, you can look at the troubleshooting section at the end of this section for some troubleshooting tips, as well as some solutions to common problems. Once the command has finished successfully, proceed with the next step which will load some initial data into the database and place the data in the docker volumes. 25.3.2.2 Add first user to PEcAn database You can add an initial user to the BETY database, for example the following commands will add the guestuser account as well as the demo carya account: # guest user docker-compose run --rm bety user guestuser guestuser "Guest User" guestuser@example.com 4 4 # example user docker-compose run --rm bety user carya illinois "Carya Demo User" carya@example.com 1 1 25.3.2.3 Add example data (first time only) The following command will add some initial data to the PEcAn stack and register the data with the database. docker run -ti --rm --network pecan_pecan --volume pecan_pecan:/data --env FQDN=docker pecan/data:develop The breakdown of this command is as follows: docker run – This says we will be running a specific command inside the target Docker container. See docker run --help and the Docker run reference for more information. -ti – This is actually two flags, -t to allocate a pseudo-tty and -i to keep STDIN open even if detached. -t is necessary to ensure lower-level script commands run correctly. -i makes sure that the command output (stdin) is displayed. --rm – This automatically removes the resulting container once the specified command exits, as well as any volumes associated with the container. This is useful as a general “clean-up” flag for one-off commands (like this one) to make sure you don’t leave any “zombie” containers or volumes around at the end. --network pecan_pecan – This indicates that the container will use the existing pecan_pecan network. This network is what ensures communication between the postgres container (which, recall, is just a PostGIS installation with some data) and the “volumes” where the actual data are persistently stored. pecan/data:develop – This is the name of the image in which to run the specified command, in the form repository/image:version. This is interpreted as follows: First, it sees if there are any images called pecan/data:develop available on your local machine. If there are, it uses that one. If that image version is not available locally, it will next try to find the image online. By default, it searches Docker Hub, such that pecan/data gets expanded to the container at https://hub.docker.com/r/pecan/data. For custom repositories, a full name can be given, such as hub.ncsa.illinois.edu/pecan/data:latest. If :version is omitted, Docker assumes :latest. NOTE that while online containers should have a :latest version, not all of them do, and if a :latest version does not exist, Docker will be unable to find the image and will throw an error. Everything after the image name (here, pecan/data:develop) is interpreted as an argument to the image’s specified entrypoint. --volume pecan_pecan:/data – This mounts the data from the subsequent container (pecan/data:develop) onto the current project volume, called pecan_pecan (as with the network, the project name pecan is the prefix, and the volume name also happens to be pecan as specified in the docker-compose.yml file). --env FQDN=docker – the Fully Qualified Domain Name, this is the same value as specified in the .env file (for the web, monitor and executor containers). This will link the data files to the name in the machines table in BETY. pecan/data:develop – As above, this is the target image to run. Since there is no argument after the image name, this command will run the default command (CMD) specified for this docker container. In this case, it is the docker/add_data.sh script from the PEcAn repository. Under the hood, this container runs the docker/add-data.sh script, which copies a bunch of input files and registers them with the PEcAn database. Successful execution of this command should take some time because it involves copying reasonably large amounts of data and performing a number of database operations. 25.3.2.4 Start PEcAn If you already completed the above steps, you can start the full stack by just running the following: docker-compose -p pecan up -d This will build and start all containers required to run PEcAn. With the -d flag, this will run all of these containers quietly in the background, and show a nice architecture diagram with the name and status of each container while they are starting. Once this is done you have a working instance of PEcAn. If all of the containers started successfully, you should be able to access the various components from a browser via the following URLs (if you run these commands on a remote machine replace localhost with the actual hostname). PEcAn web interface (running models) – http://pecan.localhost/pecan/ (NOTE: The trailing backslash is necessary.) PEcAn documentation and home page – http://pecan.localhost/ BETY web interface – http://pecan.localhost/bety/ Monitor, service that monitors models and shows all models that are online as well as how many instances are online and the number of jobs waiting. The output is in JSON – http://pecan.localhost/monitor/ 25.3.2.5 Start model runs using curl To test PEcAn you can use the following curl statement, or use the webpage to submit a request (if you run these commands on a remote machine replace localhost with the actual hostname): curl -v -X POST \\ -F 'hostname=docker' \\ -F 'modelid=5000000002' \\ -F 'sitegroupid=1' \\ -F 'siteid=772' \\ -F 'sitename=Niwot Ridge Forest/LTER NWT1 (US-NR1)' \\ -F 'pft[]=temperate.coniferous' \\ -F 'start=2004/01/01' \\ -F 'end=2004/12/31' \\ -F 'input_met=5000000005' \\ -F 'email=' \\ -F 'notes=' \\ 'http://pecan.localhost/pecan/04-runpecan.php' This should return some text with in there Location: this is shows the workflow id, you can prepend http://pecan.localhost/pecan/ to the front of this, for example: http://pecan.localhost/pecan/05-running.php?workflowid=99000000001. Here you will be able to see the progress of the workflow. To see what is happening behind the scenes you can use look at the log file of the specific docker containers, once of interest are pecan_executor_1 this is the container that will execute a single workflow and pecan_sipnet_1 which executes the sipnet mode. To see the logs you use docker logs pecan_executor_1 Following is an example output: 2018-06-13 15:50:37,903 [MainThread ] INFO : pika.adapters.base_connection - Connecting to 172.18.0.2:5672 2018-06-13 15:50:37,924 [MainThread ] INFO : pika.adapters.blocking_connection - Created channel=1 2018-06-13 15:50:37,941 [MainThread ] INFO : root - [*] Waiting for messages. To exit press CTRL+C 2018-06-13 19:44:49,523 [MainThread ] INFO : root - b'{"folder": "/data/workflows/PEcAn_99000000001", "workflowid": "99000000001"}' 2018-06-13 19:44:49,524 [MainThread ] INFO : root - Starting job in /data/workflows/PEcAn_99000000001. 2018-06-13 19:45:15,555 [MainThread ] INFO : root - Finished running job. This shows that the executor connects to RabbitMQ, waits for messages. Once it picks up a message it will print the message, and execute the workflow in the folder passed in with the message. Once the workflow (including any model executions) is finished it will print Finished. The log file for pecan_sipnet_1 is very similar, in this case it runs the job.sh in the run folder. To run multiple executors in parallel you can duplicate the executor section in the docker-compose file and just rename it from executor to executor1 and executor2 for example. The same can be done for the models. To make this easier it helps to deploy the containers using Kubernetes allowing to easily scale up and down the containers. 25.3.3 Troubleshooting When initializing the database, you will know you have encountered more serious errors if the command exits or hangs with output resembling the following: LINE 1: SELECT count(*) FROM formats WHERE ... ^ Error: Relation `formats` does not exist If the above command fails, you can try to fix things interactively by first opening a shell inside the container… docker run -ti --rm --network pecan_pecan pecan/bety:latest /bin/bash …and then running the following commands, which emulate the functionality of the entrypoint.sh with the initialize argument. # Create the bety role in the postgresql database psql -h postgres -p 5432 -U postgres -c "CREATE ROLE bety WITH LOGIN CREATEDB NOSUPERUSER NOCREATEROLE PASSWORD 'bety'" # Initialize the bety database itself, and set to be owned by role bety psql -h postgres -p 5432 -U postgres -c "CREATE DATABASE bety WITH OWNER bety" # If either of these fail with a "role/database bety already exists", # that's fine. You can safely proceed to the next command. # Load the actual bety database tables and values ./script/load.bety.sh -a "postgres" -d "bety" -p "-h postgres -p 5432" -o bety -c -u -g -m ${LOCAL_SERVER} -r 0 -w https://ebi-forecast.igb.illinois.edu/pecan/dump/all/bety.tar.gz "],["pecan-docker.html", "25.4 PEcAn Docker Architecture", " 25.4 PEcAn Docker Architecture Overview PEcAn’s docker-compose Top-level structure traefik portainer minio thredds postgres rabbitmq bety docs web executor monitor Model-specific containers 25.4.1 Overview The PEcAn docker architecture consists of many containers (see figure below) that will communicate with each other. The goal of this architecture is to easily expand the PEcAn system by deploying new model containers and registering them with PEcAn. Once this is done the user can now use these new models in their work. The PEcAn framework will setup the configurations for the models, and send a message to the model containers to start execution. Once the execution is finished the PEcAn framework will continue. This is exactly as if the model is running on a HPC machine. Models can be executed in parallel by launching multiple model containers. As can be seen in the figure the architecture leverages of two standard containers (in orange). The first container is postgresql with postgis (mdillon/postgis) which is used to store the database used by both BETY and PEcAn. The second containers is a messagebus, more specifically RabbitMQ (rabbitmq). The BETY app container (pecan/bety) is the front end to the BETY database and is connected to the postgresql container. A http server can be put in front of this container for SSL termination as well to allow for load balancing (by using multiple BETY app containers). The PEcAn framework containers consist of multiple unique ways to interact with the PEcAn system (none of these containers will have any models installed): PEcAn shiny hosts the shiny applications developed and will interact with the database to get all information necessary to display PEcAn rstudio is a rstudio environment with the PEcAn libraries preloaded. This allows for prototyping of new algorithms that can be used as part of the PEcAn framework later. PEcAn web allows the user to create a new PEcAn workflow. The workflow is stored in the database, and the models are executed by the model containers. PEcAn cli will allow the user to give a pecan.xml file that will be executed by the PEcAn framework. The workflow created from the XML file is stored in the database, and the models are executed by the model containers. The model containers contain the actual models that are executed as well as small wrappers to make them work in the PEcAn framework. The containers will run the model based on the parameters received from the message bus and convert the outputs back to the standard PEcAn output format. Once the container is finished processing a message it will immediatly get the next message and start processing it. 25.4.2 PEcAn’s docker-compose The PEcAn Docker architecture is described in full by the PEcAn docker-compose.yml file. For full docker-compose syntax, see the official documentation. This section describes the top-level structure and each of the services, which are as follows: traefik portainer minio thredds postgres rabbitmq bety docs web executor monitor Model-specific services For reference, the complete docker-compose file is as follows: version: '3.2' services: traefik: image: traefik:v2.9 command: - --log.level=INFO - --api=true - --api.dashboard=true - --entrypoints.web.address=:80 - --providers.docker=true - --providers.docker.endpoint=unix:///var/run/docker.sock - --providers.docker.exposedbydefault=false - --providers.docker.watch=true restart: unless-stopped networks: pecan security_opt: no-new-privileges:true ports: ${TRAEFIK_HTTP_PORT-80}:80 volumes: - traefik:/config - /var/run/docker.sock:/var/run/docker.sock:ro labels: - traefik.enable=true - traefik.http.routers.traefik.entrypoints=web - traefik.http.routers.traefik.rule=Host(`traefik.pecan.localhost`) - traefik.http.routers.traefik.service=api@internal rabbitmq: image: rabbitmq:3.8-management restart: unless-stopped networks: pecan environment: - RABBITMQ_DEFAULT_USER=${RABBITMQ_DEFAULT_USER:-guest} - RABBITMQ_DEFAULT_PASS=${RABBITMQ_DEFAULT_PASS:-guest} labels: - traefik.enable=true - traefik.http.services.rabbitmq.loadbalancer.server.port=15672 - traefik.http.routers.rabbitmq.entrypoints=web - traefik.http.routers.rabbitmq.rule=Host(`rabbitmq.pecan.localhost`) volumes: rabbitmq:/var/lib/rabbitmq postgres: image: mdillon/postgis:9.5 restart: unless-stopped networks: pecan volumes: postgres:/var/lib/postgresql/data bety: image: pecan/bety:${BETY_VERSION:-latest} restart: unless-stopped networks: pecan environment: - UNICORN_WORKER_PROCESSES=1 - SECRET_KEY_BASE=${BETY_SECRET_KEY:-notasecret} - RAILS_RELATIVE_URL_ROOT=/bety - LOCAL_SERVER=${BETY_LOCAL_SERVER:-99} depends_on: postgres labels: - traefik.enable=true - traefik.http.services.bety.loadbalancer.server.port=8000 - traefik.http.routers.bety.rule=Host(`${TRAEFIK_HOST:-pecan.localhost}`) && PathPrefix(`/bety/`) rstudio: image: pecan/base:${PECAN_VERSION:-latest} command: /work/rstudio.sh restart: unless-stopped networks: pecan depends_on: - rabbitmq - postgres environment: - KEEP_ENV=RABBITMQ_URI RABBITMQ_PREFIX RABBITMQ_PORT FQDN NAME - RABBITMQ_URI=${RABBITMQ_URI:-amqp://guest:guest@rabbitmq/%2F} - RABBITMQ_PREFIX=/ - RABBITMQ_PORT=15672 - FQDN=${PECAN_FQDN:-docker} - NAME=${PECAN_NAME:-docker} - USER=${PECAN_RSTUDIO_USER:-carya} - PASSWORD=${PECAN_RSTUDIO_PASS:-illinois} - USERID=${UID:-1001} - GROUPID=${GID:-1001} volumes: - pecan:/data - rstudio:/home labels: - traefik.enable=true - traefik.http.routers.rstudio.rule=Host(`${TRAEFIK_HOST:-pecan.localhost}`) && PathPrefix(`/rstudio/`) - traefik.http.routers.rstudio.service=rstudio - traefik.http.routers.rstudio.middlewares=rstudio-stripprefix,rstudio-headers - traefik.http.services.rstudio.loadbalancer.server.port=8787 - traefik.http.middlewares.rstudio-headers.headers.customrequestheaders.X-RStudio-Root-Path=/rstudio - traefik.http.middlewares.rstudio-stripprefix.stripprefix.prefixes=/rstudio - traefik.http.routers.rstudio-local.entrypoints=web - traefik.http.routers.rstudio-local.rule=Host(`rstudio.pecan.localhost`) - traefik.http.routers.rstudio-local.service=rstudio-local - traefik.http.services.rstudio-local.loadbalancer.server.port=8787 docs: image: pecan/docs:${PECAN_VERSION:-latest} restart: unless-stopped networks: pecan labels: - traefik.enable=true - traefik.http.services.docs.loadbalancer.server.port=80 - traefik.http.routers.docs.rule=Host(`${TRAEFIK_HOST:-pecan.localhost}`) && PathPrefix(`/`) pecan: user: ${UID:-1001}:${GID:-1001} image: pecan/web:${PECAN_VERSION:-latest} restart: unless-stopped networks: pecan environment: - RABBITMQ_URI=${RABBITMQ_URI:-amqp://guest:guest@rabbitmq/%2F} - FQDN=${PECAN_FQDN:-docker} - NAME=${PECAN_NAME:-docker} - SECRET_KEY_BASE=${BETY_SECRET_KEY:-thisisnotasecret} depends_on: - postgres - rabbitmq labels: - traefik.enable=true - traefik.http.services.pecan.loadbalancer.server.port=8080 - traefik.http.routers.pecan.rule=Host(`${TRAEFIK_HOST:-pecan.localhost}`) && PathPrefix(`/pecan/`) volumes: - pecan:/data - pecan:/var/www/html/pecan/data monitor: user: ${UID:-1001}:${GID:-1001} image: pecan/monitor:${PECAN_VERSION:-latest} restart: unless-stopped networks: pecan environment: - RABBITMQ_URI=${RABBITMQ_URI:-amqp://guest:guest@rabbitmq/%2F} - FQDN=${PECAN_FQDN:-docker} depends_on: rabbitmq labels: - traefik.enable=true - traefik.http.routers.monitor.rule=Host(`${TRAEFIK_HOST:-pecan.localhost}`) && PathPrefix(`/monitor/`) - traefik.http.routers.monitor.middlewares=monitor-stripprefix - traefik.http.middlewares.monitor-stripprefix.stripprefix.prefixes=/monitor volumes: pecan:/data executor: user: ${UID:-1001}:${GID:-1001} image: pecan/executor:${PECAN_VERSION:-latest} restart: unless-stopped networks: pecan environment: - RABBITMQ_URI=${RABBITMQ_URI:-amqp://guest:guest@rabbitmq/%2F} - RABBITMQ_PREFIX=/ - RABBITMQ_PORT=15672 - FQDN=${PECAN_FQDN:-docker} depends_on: - postgres - rabbitmq volumes: pecan:/data basgra: user: ${UID:-1001}:${GID:-1001} image: pecan/model-basgra-basgra_n_v1.0:${PECAN_VERSION:-latest} restart: unless-stopped networks: pecan environment: RABBITMQ_URI=${RABBITMQ_URI:-amqp://guest:guest@rabbitmq/%2F} depends_on: rabbitmq volumes: pecan:/data sipnet: user: ${UID:-1001}:${GID:-1001} image: pecan/model-sipnet-r136:${PECAN_VERSION:-latest} restart: unless-stopped networks: pecan environment: RABBITMQ_URI=${RABBITMQ_URI:-amqp://guest:guest@rabbitmq/%2F} depends_on: rabbitmq volumes: pecan:/data ed2: user: ${UID:-1001}:${GID:-1001} image: pecan/model-ed2-2.2.0:${PECAN_VERSION:-latest} restart: unless-stopped networks: pecan environment: RABBITMQ_URI=${RABBITMQ_URI:-amqp://guest:guest@rabbitmq/%2F} depends_on: rabbitmq volumes: pecan:/data maespa: user: ${UID:-1001}:${GID:-1001} image: pecan/model-maespa-git:${PECAN_VERSION:-latest} restart: unless-stopped networks: pecan environment: RABBITMQ_URI=${RABBITMQ_URI:-amqp://guest:guest@rabbitmq/%2F} depends_on: rabbitmq volumes: pecan:/data biocro: user: ${UID:-1001}:${GID:-1001} image: pecan/model-biocro-0.95:${PECAN_VERSION:-latest} restart: unless-stopped networks: pecan environment: RABBITMQ_URI=${RABBITMQ_URI:-amqp://guest:guest@rabbitmq/%2F} depends_on: rabbitmq volumes: pecan:/data dbsync: image: pecan/shiny-dbsync:${PECAN_VERSION:-latest} restart: unless-stopped networks: pecan depends_on: postgres labels: - traefik.enable=true - traefik.http.routers.dbsync.rule=Host(`${TRAEFIK_HOST:-pecan.localhost}`) && PathPrefix(`/dbsync/`) - traefik.http.routers.dbsync.middlewares=dbsync-stripprefix - traefik.http.middlewares.dbsync-stripprefix.stripprefix.prefixes=/monitor api: image: pecan/api:${PECAN_VERSION:-latest} user: ${UID:-1001}:${GID:-1001} networks: pecan environment: - PGHOST=${PGHOST:-postgres} - HOST_ONLY=${HOST_ONLY:-FALSE} - AUTH_REQ=${AUTH_REQ:-FALSE} - RABBITMQ_URI=${RABBITMQ_URI:-amqp://guest:guest@rabbitmq/%2F} - DATA_DIR=${DATA_DIR:-/data/} - DBFILES_DIR=${DBFILES_DIR:-/data/dbfiles/} - SECRET_KEY_BASE=${BETY_SECRET_KEY:-thisisnotasecret} labels: - traefik.enable=true - traefik.http.routers.api.rule=Host(`${TRAEFIK_HOST:-pecan.localhost}`) && PathPrefix(`/api/`) - traefik.http.services.api.loadbalancer.server.port=8000 depends_on: postgres volumes: pecan:/data/ networks: pecan: ~ volumes: traefik: ~ postgres: ~ rabbitmq: ~ pecan: ~ rstudio: ~ There are two ways you can override different values in the docker-compose.yml file. The first method is to create a file called .env that is placed in the same folder as the docker-compose.yml file. This file can override some of configuration variables used by docker-compose. For example the following is an example of the env file # This file will override the configuration options in the docker-compose # file. Copy this file to the same folder as docker-compose as .env # ---------------------------------------------------------------------- # GENERAL CONFIGURATION # ---------------------------------------------------------------------- # project name (-p flag for docker-compose) #COMPOSE_PROJECT_NAME=pecan # ---------------------------------------------------------------------- # TRAEFIK CONFIGURATION # ---------------------------------------------------------------------- # hostname of server #TRAEFIK_HOST=pecan-docker.ncsa.illinois.edu # Run traffik on port 80 (http) and port 443 (https) #TRAEFIK_HTTP_PORT=80 #TRAEFIK_HTTPS_PORT=443 # Use you real email address here to be notified if cert expires #TRAEFIK_ACME_EMAIL=pecanproj@gmail.com # ---------------------------------------------------------------------- # PEcAn CONFIGURATION # ---------------------------------------------------------------------- # what version of pecan to use #PECAN_VERSION=develop # the fully qualified hostname used for this server #PECAN_FQDN=pecan-docker.ncsa.illinois.edu # short name shown in the menu #PECAN_NAME=pecan-docker # ---------------------------------------------------------------------- # BETY CONFIGURATION # ---------------------------------------------------------------------- # what version of BETY to use #BETY_VERSION=develop # what is our server number, 99=vm, 98=docker #BETY_LOCAL_SERVER=98 # secret used to encrypt cookies in BETY #BETY_SECRET_KEY=1208q7493e8wfhdsohfo9ewhrfiouaho908ruq30oiewfdjspadosuf08q345uwrasdy98t7q243 # ---------------------------------------------------------------------- # MINIO CONFIGURATION # ---------------------------------------------------------------------- # minio username and password #MINIO_ACCESS_KEY=carya #MINIO_SECRET_KEY=illinois # ---------------------------------------------------------------------- # PORTAINER CONFIGURATION # ---------------------------------------------------------------------- # password for portainer admin account # use docker run --rm httpd:2.4-alpine htpasswd -nbB admin <password> | cut -d ":" -f 2 #PORTAINER_PASSWORD=$2y$05$5meDPBtS3NNxyGhBpYceVOxmFhiiC3uY5KEy2m0YRbWghhBr2EVn2 # ---------------------------------------------------------------------- # RABBITMQ CONFIGURATION # ---------------------------------------------------------------------- # RabbitMQ username and password #RABBITMQ_DEFAULT_USER=carya #RABBITMQ_DEFAULT_PASS=illinois # create the correct URI with above username and password #RABBITMQ_URI=amqp://carya:illinois@rabbitmq/%2F # ---------------------------------------------------------------------- # RSTUDIO CONFIGURATION # ---------------------------------------------------------------------- # Default RStudio username and password for startup of container #PECAN_RSTUDIO_USER=carya #PECAN_RSTUDIO_PASS=illinois You can also extend the docker-compose.yml file with a docker-compose.override.yml file (in the same directory), allowing you to add more services, or for example to change where the volumes are stored (see official documentation). For example the following will change the volume for postgres to be stored in your home directory: version: "3" volumes: postgres: driver_opts: type: none device: ${HOME}/postgres o: bind 25.4.3 Top-level structure The root of the docker-compose.yml file contains three sections: services – This is a list of services provided by the application, with each service corresponding to a container. When communicating with each other internally, the hostnames of containers correspond to their names in this section. For instance, regardless of the “project” name passed to docker-compose up, the hostname for connecting to the PostgreSQL database of any given container is always going to be postgres (e.g. you should be able to access the PostgreSQL database by calling the following from inside the container: psql -d bety -U bety -h postgres). The services comprising the PEcAn application are described below. networks – This is a list of networks used by the application. Containers can only communicate with each other (via ports and hostnames) if they are on the same Docker network, and containers on different networks can only communicate through ports exposed by the host machine. We just provide the network name (pecan) and resort to Docker’s default network configuration. Note that the services we want connected to this network include a networks: ... - pecan tag. For more details on Docker networks, see the official documentation. volumes – Similarly to networks, this just contains a list of volume names we want. Briefly, in Docker, volumes are directories containing files that are meant to be shared across containers. Each volume corresponds to a directory, which can be mounted at a specific location by different containers. For example, syntax like volumes: ... - pecan:/data in a service definition means to mount the pecan “volume” (including its contents) in the /data directory of that container. Volumes also allow data to persist on containers between restarts, as normally, any data created by a container during its execution is lost when the container is re-launched. For example, using a volume for the database allows data to be saved between different runs of the database container. Without volumes, we would start with a blank database every time we restart the containers. For more details on Docker volumes, see the official documentation. Here, we define three volumes: postgres – This contains the data files underlying the PEcAn PostgreSQL database (BETY). Notice that it is mounted by the postgres container to /var/lib/postgresql/data. This is the data that we pre-populate when we run the Docker commands to initialize the PEcAn database. Note that these are the values stored directly in the PostgreSQL database. The default files to which the database points (i.e. dbfiles) are stored in the pecan volume, described below. rabbitmq – This volume contains persistent data for RabbitMQ. It is only used by the rabbitmq service. pecan – This volume contains PEcAn’s dbfiles, which include downloaded and converted model inputs, processed configuration files, and outputs. It is used by almost all of the services in the PEcAn stack, and is typically mounted to /data. 25.4.4 traefik Traefik manages communication among the different PEcAn services and between PEcAn and the web. Among other things, traefik facilitates the setup of web access to each PEcAn service via common and easy-to-remember URLs. For instance, the following lines in the web service configure access to the PEcAn web interface via the URL http://pecan.localhost/pecan/ : ~ (Further details in the works…) The traefik service configuration looks like this: traefik: image: traefik:v2.9 command: - --log.level=INFO - --api=true - --api.dashboard=true - --entrypoints.web.address=:80 - --providers.docker=true - --providers.docker.endpoint=unix:///var/run/docker.sock - --providers.docker.exposedbydefault=false - --providers.docker.watch=true restart: unless-stopped networks: pecan security_opt: no-new-privileges:true ports: ${TRAEFIK_HTTP_PORT-80}:80 volumes: - traefik:/config - /var/run/docker.sock:/var/run/docker.sock:ro labels: - traefik.enable=true - traefik.http.routers.traefik.entrypoints=web - traefik.http.routers.traefik.rule=Host(`traefik.pecan.localhost`) - traefik.http.routers.traefik.service=api@internal 25.4.5 portainer portainer is lightweight management UI that allows you to manage the docker host (or swarm). You can use this service to monitor the different containers, see the logfiles, and start and stop containers. The portainer service configuration looks like this: NA: ~ Portainer is accessible by browsing to pecan.localhost/portainer/. You can either set the password in the .env file (for an example see env.example) or you can use the web browser and go to the portainer url. If this is the first time it will ask for your password. 25.4.6 minio Minio is a service that provides access to the a folder on disk through a variety of protocols, including S3 buckets and web-based access. We mainly use Minio to facilitate access to PEcAn data using a web browser without the need for CLI tools. Our current configuration is as follows: NA: ~ The Minio interface is accessible by browsing to minio.pecan.localhost. From there, you can browse directories and download files. You can also upload files by clicking the red “+” in the bottom-right corner. Note that it is currently impossible to create or upload directories using the Minio interface (except in the /data root directory – those folders are called “buckets” in Minio). Therefore, the recommended way to perform any file management tasks other than individual file uploads is through the command line, e.g. docker run -it --rm --volumes pecan_pecan:/data --volumes /path/to/local/directory:/localdir ubuntu # Now, you can move files between `/data` and `/localdir`, create new directories, etc. 25.4.7 thredds This service allows PEcAn model outputs to be accessible via the THREDDS data server (TDS). When the PEcAn stack is running, the catalog can be explored in a web browser at http://pecan.localhost/thredds/catalog.html. Specific output files can also be accessed from the command line via commands like the following: Note that everything after outputs/ exactly matches the directory structure of the workflows directory. Which files are served, which subsetting services are available, and other aspects of the data server’s behavior are configured in the docker/thredds_catalog.xml file. Specifically, this XML tells the data server to use the datasetScan tool to serve all files within the /data/workflows directory, with the additional filter that only files ending in .nc are served. For additional information about the syntax of this file, see the extensive THREDDS documentation. Our current configuration is as follows: NA: ~ 25.4.8 postgres This service provides a working PostGIS database. Our configuration is fairly straightforward: postgres: image: mdillon/postgis:9.5 restart: unless-stopped networks: pecan volumes: postgres:/var/lib/postgresql/data Some additional details about our configuration: image – This pulls a container with PostgreSQL + PostGIS pre-installed. Note that by default, we use PostgreSQL version 9.5. To experiment with other versions, you can change 9.5 accordingly. networks – This allows PostgreSQL to communicate with other containers on the pecan network. As mentioned above, the hostname of this service is just its name, i.e. postgres, so to connect to the database from inside a running container, use a command like the following: psql -d bety -U bety -h postgres volumes – Note that the PostgreSQL data files (which store the values in the SQL database) are stored on a volume called postgres (which is not the same as the postgres service, even though they share the same name). 25.4.9 rabbitmq RabbitMQ is a message broker service. In PEcAn, RabbitMQ functions as a task manager and scheduler, coordinating the execution of different tasks (such as running models and analyzing results) associated with the PEcAn workflow. Our configuration is as follows: rabbitmq: image: rabbitmq:3.8-management restart: unless-stopped networks: pecan environment: - RABBITMQ_DEFAULT_USER=${RABBITMQ_DEFAULT_USER:-guest} - RABBITMQ_DEFAULT_PASS=${RABBITMQ_DEFAULT_PASS:-guest} labels: - traefik.enable=true - traefik.http.services.rabbitmq.loadbalancer.server.port=15672 - traefik.http.routers.rabbitmq.entrypoints=web - traefik.http.routers.rabbitmq.rule=Host(`rabbitmq.pecan.localhost`) volumes: rabbitmq:/var/lib/rabbitmq Note that the traefik.http.routers.rabbitmq.rule indicates that browsing to http://rabbitmq.pecan.localhost/ leads to the RabbitMQ management console. By default, the RabbitMQ management console has username/password guest/guest, which is highly insecure. For production instances of PEcAn, we highly recommend changing these credentials to something more secure, and removing access to the RabbitMQ management console via Traefik. 25.4.10 bety This service operates the BETY web interface, which is effectively a web-based front-end to the PostgreSQL database. Unlike the postgres service, which contains all the data needed to run PEcAn models, this service is not essential to the PEcAn workflow. However, note that certain features of the PEcAn web interface do link to the BETY web interface and will not work if this container is not running. Our configuration is as follows: bety: image: pecan/bety:${BETY_VERSION:-latest} restart: unless-stopped networks: pecan environment: - UNICORN_WORKER_PROCESSES=1 - SECRET_KEY_BASE=${BETY_SECRET_KEY:-notasecret} - RAILS_RELATIVE_URL_ROOT=/bety - LOCAL_SERVER=${BETY_LOCAL_SERVER:-99} depends_on: postgres labels: - traefik.enable=true - traefik.http.services.bety.loadbalancer.server.port=8000 - traefik.http.routers.bety.rule=Host(`${TRAEFIK_HOST:-pecan.localhost}`) && PathPrefix(`/bety/`) The BETY container Dockerfile is located in the root directory of the BETY GitHub repository (direct link). 25.4.11 docs This service will show the documentation for the version of PEcAn running as well as a homepage with links to all relevant endpoints. You can access this at http://pecan.localhost/. You can find the documentation for PEcAn at http://pecan.localhost/docs/pecan/. Our current configuration is as follows: docs: image: pecan/docs:${PECAN_VERSION:-latest} restart: unless-stopped networks: pecan labels: - traefik.enable=true - traefik.http.services.docs.loadbalancer.server.port=80 - traefik.http.routers.docs.rule=Host(`${TRAEFIK_HOST:-pecan.localhost}`) && PathPrefix(`/`) 25.4.12 web This service runs the PEcAn web interface. It is effectively a thin wrapper around a standard Apache web server container from Docker Hub that installs some additional dependencies and copies over the necessary files from the PEcAn source code. Our configuration is as follows: NA: ~ Its Dockerfile ships with the PEcAn source code, in docker/web/Dockerfile. In terms of actively developing PEcAn using Docker, this is the service to modify when making changes to the web interface (i.e. PHP, HTML, and JavaScript code located in the PEcAn web directory). 25.4.13 executor This service is in charge of running the R code underlying the core PEcAn workflow. However, it is not in charge of executing the models themselves – model binaries are located on their own dedicated Docker containers, and model execution is coordinated by RabbitMQ. Our configuration is as follows: executor: user: ${UID:-1001}:${GID:-1001} image: pecan/executor:${PECAN_VERSION:-latest} restart: unless-stopped networks: pecan environment: - RABBITMQ_URI=${RABBITMQ_URI:-amqp://guest:guest@rabbitmq/%2F} - RABBITMQ_PREFIX=/ - RABBITMQ_PORT=15672 - FQDN=${PECAN_FQDN:-docker} depends_on: - postgres - rabbitmq volumes: pecan:/data Its Dockerfile is ships with the PEcAn source code, in docker/executor/Dockerfile. Its image is built on top of the pecan/base image (docker/base/Dockerfile), which contains the actual PEcAn source. To facilitate caching, the pecan/base image is itself built on top of the pecan/depends image (docker/depends/Dockerfile), a large image that contains an R installation and PEcAn’s many system and R package dependencies (which usually take ~30 minutes or longer to install from scratch). In terms of actively developing PEcAn using Docker, this is the service to modify when making changes to the PEcAn R source code. Note that, unlike changes to the web image’s PHP code, changes to the R source code do not immediately propagate to the PEcAn container; instead, you have to re-compile the code by running make inside the container. 25.4.14 monitor This service will show all models that are currently running http://pecan.localhost/monitor/. This list returned is JSON and shows all models (grouped by type and version) that are currently running, or where seen in the past. This list will also contain a list of all current active containers, as well as how many jobs are waiting to be processed. This service is also responsible for registering any new models with PEcAn so users can select it and execute the model from the web interface. Our current configuration is as follows: monitor: user: ${UID:-1001}:${GID:-1001} image: pecan/monitor:${PECAN_VERSION:-latest} restart: unless-stopped networks: pecan environment: - RABBITMQ_URI=${RABBITMQ_URI:-amqp://guest:guest@rabbitmq/%2F} - FQDN=${PECAN_FQDN:-docker} depends_on: rabbitmq labels: - traefik.enable=true - traefik.http.routers.monitor.rule=Host(`${TRAEFIK_HOST:-pecan.localhost}`) && PathPrefix(`/monitor/`) - traefik.http.routers.monitor.middlewares=monitor-stripprefix - traefik.http.middlewares.monitor-stripprefix.stripprefix.prefixes=/monitor volumes: pecan:/data 25.4.15 Model-specific containers Additional models are added as additional services. In general, their configuration should be similar to the following configuration for SIPNET, which ships with PEcAn: sipnet: user: ${UID:-1001}:${GID:-1001} image: pecan/model-sipnet-r136:${PECAN_VERSION:-latest} restart: unless-stopped networks: pecan environment: RABBITMQ_URI=${RABBITMQ_URI:-amqp://guest:guest@rabbitmq/%2F} depends_on: rabbitmq volumes: pecan:/data The PEcAn source contains Dockerfiles for ED2 (models/ed/Dockerfile) and SIPNET (models/sipnet/Dockerfile) that can serve as references. For additional tips on constructing a Dockerfile for your model, see Dockerfiles for Models. "],["model-docker.html", "25.5 Models using Docker", " 25.5 Models using Docker This section will discuss how to add new models to PEcAn docker. To be able to add a new model to PEcAn when using docker is as simple as starting a new container. The model will come online and let the PEcAn framework know there is a new model available, there is no need to go through the process of registering this model with PEcAn. Users will be able to select this new model from web interface and run with this model selected. For this process to work a docker image of the model will need to be created as well as small json file that is used to announce this new model. A separate service in PEcAn (monitor) will use this json file to keep track of all models available as well as register these models with PEcAn. Model information Model build Common problems 25.5.1 Model information Each model will have a small json file called model_info.json that is used to describe the model and used by the monitor service to register the model with PEcAn. This file will contain information about the model that is send as part of the heartbeat of the container to the monitor service. Below is an example of this file for the ED model. The required fields are name, type, version and binary. The fields type and version are used by PEcAn to send the messages to RabbitMQ. There is no need to specify the queue name explicitly. The queue name will be created by combining these two fields with an underscore. The field binary is used to point to the actual binary in the docker container. There are 2 special values that can be used, @VERSION@ which will be replaced by the version that is passed in when building the container, and @BINARY@ which will be replaced by the binary when building the docker image. { "name": "ED2.2", "type": "ED2", "version": "@VERSION@", "binary": "@BINARY@", "description": "The Ecosystem Demography Biosphere Model (ED2) is an integrated terrestrial biosphere model incorporating hydrology, land-surface biophysics, vegetation dynamics, and soil carbon and nitrogen biogeochemistry", "author": "Mike Dietze", "contributors": ["David LeBauer", "Xiaohui Feng", "Dan Wang", "Carl Davidson", "Rob Kooper", "Shawn Serbin", "Alexey Shiklomanov"], "links": { "source": "https://github.com/EDmodel/ED2", "issues": "https://github.com/EDmodel/ED2/issues" }, "inputs": {}, "citation": ["Medvigy D., Wofsy S. C., Munger J. W., Hollinger D. Y., Moorcroft P. R. 2009. Mechanistic scaling of ecosystem function and dynamics in space and time: Ecosystem Demography model version 2. J. Geophys. Res. 114 (doi:10.1029/2008JG000812)"] } Other fields that are recommended, but currently not used yet, are: description : a longer description of the model. creator : contact person about this docker image. contribuor : other people that have contributed to this docker image. links : addtional links to help people when using this docker image, for example values that can be used are source to link to the source code, issues to link to issue tracking system, and documentation to link to model specific documentation. citation : how the model should be cited in publications. 25.5.2 Model build In general we try to minimize the size of the images. To be able to do this we split the process of creating the building of the model images into two pieces (or leverage of an image that exists from the original model developers). If you look at the example Dockerfile you will see that there are 2 sections, the first section will build the model binary, the second section will build the actual PEcAn model, which copies the binary from the first section. This is an example of how the ED2 model is build. This will install all the packages needed to build ED2 model, gets the latest version from GitHub and builds the model. The second section will create the actual model runner. This will leverage the PEcAn model image that has PEcAn already installed as well as the python code to listen for messages and run the actual model code. This will install some additional packages needed by the model binary (more about that below), copy the model_info.json file and change the @VERSION@ and @BINARY@ placeholders. It is important values for type and version are set correct. The PEcAn code will use these to register the model with the BETY database, which is then used by PEcAn to send out a message to a specfic worker queue, if you do not set these variables correctly your model executor will pick up messages for the wrong model. To build the docker image, we use a Dockerfile (see example below) and run the following command. This command will expect the Dockerfile to live in the model specific folder and the command is executed in the root pecan folder. It will copy the content of the pecan folder and make it available to the build process (in this example we do not need any additional files). Since we can have multiple different versions of a model be available for PEcAn we ar using the following naming schema pecan/model-<modeltype>-<version>:<pecan version. For example the image below will be named pecan/model-ed2-git, since we do not specify the exact version it will be atomically be named pecan/model-ed2-git:latest. docker build \\ --tag pecan/model-ed2-git \\ --file models/ed/Dockerfile \\ . Example of a Dockerfile, in this case to build the ED2 model. # ---------------------------------------------------------------------- # BUILD MODEL BINARY # ---------------------------------------------------------------------- FROM debian:stretch as model-binary # Some variables that can be used to set control the docker build ARG MODEL_VERSION=git # install dependencies RUN apt-get update \\ && apt-get install -y --no-install-recommends \\ build-essential \\ curl \\ gfortran \\ git \\ libhdf5-dev \\ libopenmpi-dev \\ && rm -rf /var/lib/apt/lists/* # download, unzip and build ed2 WORKDIR /src RUN git -c http.sslVerify=false clone https://github.com/EDmodel/ED2.git \\ && cd ED2/ED/build \\ && curl -o make/include.mk.VM http://isda.ncsa.illinois.edu/~kooper/EBI/include.mk.opt.Linux \\ && if [ "${MODEL_VERSION}" != "git" ]; then git checkout ${MODEL_VERSION}; fi \\ && ./install.sh -g -p VM ######################################################################## # ---------------------------------------------------------------------- # BUILD PECAN FOR MODEL # ---------------------------------------------------------------------- FROM pecan/models:latest # ---------------------------------------------------------------------- # INSTALL MODEL SPECIFIC PIECES # ---------------------------------------------------------------------- RUN apt-get update \\ && apt-get install -y --no-install-recommends \\ libgfortran3 \\ libopenmpi2 \\ && rm -rf /var/lib/apt/lists/* # ---------------------------------------------------------------------- # SETUP FOR SPECIFIC MODEL # ---------------------------------------------------------------------- # Some variables that can be used to set control the docker build ARG MODEL_VERSION=git # Setup model_info file COPY models/ed/model_info.json /work/model.json RUN sed -i -e "s/@VERSION@/${MODEL_VERSION}/g" \\ -e "s#@BINARY@#/usr/local/bin/ed2.${MODEL_VERSION}#g" /work/model.json # COPY model binary COPY --from=model-binary /src/ED2/ED/build/ed_2.1-opt /usr/local/bin/ed2.${MODEL_VERSION} WARNING: Dockerfile environment variables set via ENV are assigned all at once; they do not evaluate successively, left to right. Consider the following block: # Don't do this! ENV MODEL_TYPE="SIPNET" \\ MODEL_VERSION=136 \\ MODEL_TYPE_VERSION=${MODEL_TYPE}_${MODEL_VERSION} # <- Doesn't know about MODEL_TYPE or MODEL_VERSION! In this block, the expansion for setting MODEL_TYPE_VERSION is not aware of the current values of MODEL_TYPE or MODEL_VERSION, and will therefore be set incorrectly to just _ (unless they have been set previously, in which case it will be aware only of their earlier values). As such, variables depending on other variables must be set in a separate, subsequent ENV statement than the variables they depend on. Once the model has build and is working we can add it to the PEcAn stack and be able to use this model in the web interface. There are two methods to start this new model. First, we can add it to the docker-compose.yml file and start the container using docker-compose -p pecan -d up. sipnet: image: pecan/model-ed2-git networks: - pecan volumes: - pecan:/data depends_on: - rabbitmq restart: unless-stopped Alternatively we can start the container manually using the following command. docker run \\ --detach \\ --rm \\ --name pecan-ed2-git \\ --networks pecan_pecan \\ --volume pecan_pecan:/data pecan/model-ed2-git 25.5.3 Common problems Following are some solutions for common problems that you might encounter when building the docker images for a model. 25.5.4 Debugging missing libraries When building the model binary it might require specific libraries to be installed. In the second stage the model binary is copied into a new image, which could result in the binary missing specific libraries. In the case of the ED2 model the following was used to find the libraries that are needed to be installed (libgfortran5 and libopenmpi3). The first step is to build the model using the Dockerfile (in this case the ap-get install was missing in the second stage). Step 5/9 : RUN git clone https://github.com/EDmodel/ED2.git && cd ED2/ED/build && curl -o make/include.mk.VM http://isda.ncsa.illinois.edu/~kooper/EBI/include.mk.opt.`uname -s` && if [ "${MODEL_VERSION}" != "git" ]; then git checkout ${MODEL_VERSION}; fi && ./install.sh -g -p VM ... LOTS OF OUTPUT ... make[1]: Leaving directory '/src/ED2/ED/build/bin-opt-E' Installation Complete. Removing intermediate container a53eba9a8fc1 ---> 7f23c6302130 Step 6/9 : FROM pecan/executor:latest ---> f19d81b739f5 ... MORE OUTPUT ... Step 9/9 : COPY --from=model-binary /src/ED2/ED/build/ed_2.1-opt /usr/local/bin/ed2.${MODEL_VERSION} ---> 07ac841be457 Successfully built 07ac841be457 Successfully tagged pecan/pecan-ed2:latest At this point we have created a docker image with the binary and all PEcAn code that is needed to run the model. Some models (especially those build as native code) might be missing additional packages that need to be installed in the docker image. To see if all libraries are installed for the binary. > docker run -ti --rm pecan/pecan-ed2 /bin/bash root@8a95ee8b6b47:/work# ldd /usr/local/bin/ed2.git | grep "not found" libmpi_usempif08.so.40 => not found libmpi_usempi_ignore_tkr.so.40 => not found libmpi_mpifh.so.40 => not found libmpi.so.40 => not found libgfortran.so.5 => not found Start the build container again (this is the number before the line FROM pecan/executor:latest, 7f23c6302130 in the example), and find the missing libraries listed above (for example libmpi_usempif08.so.40): > docker run --rm -ti 7f23c6302130 root@e716c63c031f:/src# dpkg -S libmpi_usempif08.so.40 libopenmpi3:amd64: /usr/lib/x86_64-linux-gnu/openmpi/lib/libmpi_usempif08.so.40.10.1 libopenmpi3:amd64: /usr/lib/x86_64-linux-gnu/libmpi_usempif08.so.40.10.1 libopenmpi3:amd64: /usr/lib/x86_64-linux-gnu/libmpi_usempif08.so.40 This shows the pages is libopenmpi3 that needs to be installed, do this for all missing packages, modify the Dockerfile and rebuild. Next time you run the ldd command there should be no more packages being listed. "],["docker-build-images.html", "25.6 Building and modifying images", " 25.6 Building and modifying images The only other section on this page is: Local development and testing with Docker For general use, it is sufficient to use the pre-built PEcAn images hosted on Docker Hub (see Docker quickstart). However, there are cases where it makes sense to re-build the Docker images locally. The following is a list of PEcAn-specific images and reasons why you would want to rebuild them locally: pecan/depends – Rebuild if: You modify the docker/depends/Dockerfile You introduce new system dependencies (i.e. things that need to be installed with apt-get) You introduce new R package dependencies, and you want those R package installations to be cached during future builds. For packages with fast build times, it may be fine to let them be installed as part of PEcAn’s standard build process (i.e. make). pecan/base – Rebuild if: You built a new version of pecan/depends (on which pecan/base depends) You modify the docker/base/Dockerfile You made changes to the PEcAn R package source code, the Makefile, or web/workflow.R. NOTE that changes to the web interface code affect pecan/web, not pecan/base pecan/executor – Rebuild if: You built a new version of pecan/base (on which pecan/executor depends) and/or, pecan/depends (on which pecan/base depends) You modified the docker/executor/Dockerfile You modified the RabbitMQ Python script (e.g. docker/receiver.py) pecan/web – Rebuild if you modified any of the following: docker/web/Dockerfile The PHP/HTML/JavaScript code for the PEcAn web interface in web/ (except web/workflow.R – that goes in pecan/base) docker/config.docker.php (the config.php file for Docker web instances) documentation/index_vm.html (the documentation HTML website) NOTE: Because changes to this code are applied instantly (i.e. do not require compilation or installation), a more effective way to do local development may be to mount the web/ or other relevant folders as a volume onto the pecan/web container. The easiest way to quickly re-build all of the images is using the docker.sh script in the PEcAn source code root directory. This script will build all of the docker images locally on your machine, and tag them as latest. This will not build the pecan/depends image by default because that takes considerably longer. However, you can force the script to build pecan/depends as well by setting the DEPEND environment variable to 1 (i.e. DEPEND=1 ./docker.sh). The following instructions provide details on how to build each image individually. To build an image locally, use the docker build command as described below. For more details, see docker build --help or the online Docker build documentation. First, in a terminal window, navigate (cd) into the PEcAn source code root directory. From there, the general syntax for building an image looks like the following: docker build -t pecan/<image name>:<image version> -f docker/base/Dockerfile.<image name> . For instance, to build a local version of the pecan/depends:latest image, you would run: docker build -t pecan/depends:latest -f docker/depends/Dockerfile . The breakdown of this command is as follows: docker build – This is the core command. The standard syntax is docker build [OPTIONS] <PATH>, where <PATH> refers to the directory to be used as the “build context”. The “build context” is the working directory assumed by the Dockerfiles. In PEcAn, this is always the PEcAn source code root directory, which allows Dockerfiles to use instructions such as COPY web/workflow.R /work/. In this example, the <PATH> is set to the current working directory, i.e. . because we are already in the PEcAn root directory. If you were located in a different directory, you would have to provide a path to the PEcAn source code root directory. Also, by default, docker build will look for a Dockerfile located at <PATH>/Dockerfile, but this is modified by the -f option described below. -t pecan/depends:latest – The -t/--tag option specifies how the image will be labeled. By default, Docker only defines unique image IDs, which are hexidecimal strings that are unintuitive and hard to remember. Tags are useful for referring to specific images in a human-readable way. Note that the same unique image can have multiple tags associated with it, so it is possible for, e.g. pecan/depends:latest, pecan/depends:custom, and even mypecan/somethingelse:20.0 to refer to the same exact image. To see a table of all local images, including their tags and IDs, run docker image ls. NOTE: PEcAn’s docker-compose.yml can be configured via the PECAN environment variable to point at different versions of PEcAn images. By default, it points to the :latest versions of all images. However, if you wanted to, for instance, build :local images corresponding to your local source code and then run that version of PEcAn, you would run: PECAN=local docker-compose -p pecan up -d This is an effective way to do local development and testing of different PEcAn versions, as described below. -f docker/depends/Dockerfile – The -f/--file tag is used to provide an alternative location and file name for the Dockerfile. 25.6.1 Local development and testing with Docker The following is an example of one possible workflow for developing and testing PEcAn using local Docker images. The basic idea is to mount a local version of the PEcAn source code onto a running pecan/executor image, and then send a special “rebuild” RabbitMQ message to the container to trigger the rebuild whenever you make changes. NOTE: All commands assume you are working from the PEcAn source code root directory. In the PEcAn source code directory, create a docker-compose.override.yml file with the following contents.: version: "3" services: executor: volumes: - .:/pecan This will mount the current directory . to the /pecan directory in the executor container. The special docker-compose.override.yml file is read automatically by docker-compose and overrides or extends any instructions set in the original docker-compose.yml file. It provides a convenient way to host server-specific configurations without having to modify the project-wide (and version-controlled) default configuration. For more details, see the Docker Compose documentation. Update your PEcAn Docker stack with docker-compose up -d. If the stack is already running, this should only restart your executor instance while leaving the remaining containers running. To update to the latest local code, run ./scripts/docker_rebuild.sh. Under the hood, this uses curl to post a RabbitMQ message to a running Docker instance. By default, the scripts assumes that username and password are both guest and that the RabbitMQ URL is http://rabbitmq.pecan.localhost/. All of these can be customized by setting the environment variables RABBITMQ_USER, RABBITMQ_PASSWORD, and RABBITMQ_URL, respectively (or running the script prefixed with those variables, e.g. RABBITMQ_USER=carya RABBITMQ_PASSWORD=illinois ./scripts/docker_rebuild.sh). This step can be repeated whenever you want to trigger a rebuild of the local code. NOTE: The updates with this workflow are specific to the running container session; restarting the executor container will revert to the previous versions of the installed packages. To make persistent changes, you should re-build the pecan/base and pecan/executor containers against the current version of the source code. NOTE: The mounted PEcAn source code directory includes everything in your local source directory, including installation artifacts used by make. This can lead to two common issues: - Any previous make cache files (stuff in the .install, .docs, etc. directories) persist across container instances, even though the installed packages may not. To ensure a complete build, it’s a good idea to run make clean on the host machine to remove these artifacts. - Similarly, any installation artifacts from local builds will be carried over to the build. In particular, be wary of packages with compiled code, such as modules/rtm (PEcAnRTM) – the compiled .o, .so, .mod, etc. files from compilation of such packages will carry over into the build, which can cause conflicts if the package was also built locally. The docker-compose.override.yml is useful for some other local modifications. For instance, the following adds a custom ED2 “develop” model container. services: # ... ed2devel: image: pecan/model-ed2-develop:latest build: context: ../ED2 # Or wherever ED2 source code is found networks: - pecan depends_on: - rabbitmq volumes: - pecan:/data restart: unless-stopped Similarly, this snippet modifies the pecan network to use a custom IP subnet mask. This is required on the PNNL cluster because its servers’ IP addresses often clash with Docker’s default IP mask. networks: pecan: ipam: config: - subnet: 10.17.1.0/24 "],["docker-troubleshooting.html", "25.7 Troubleshooting Docker", " 25.7 Troubleshooting Docker 25.7.1 “Package not available” while building images PROBLEM: Packages fail to install while building pecan/depends and/or pecan/base with an error like the following: Installing package into ‘/usr/local/lib/R/site-library’ (as ‘lib’ is unspecified) Warning: unable to access index for repository <URL>: cannot open URL '<URL>' Warning message: package ‘<PACKAGE>’ is not available (for R version 3.5.1) CAUSE: This can sometimes happen if there are problems with the RStudio Package manager, which is the default repository for the rocker/tidyverse containers. See GitHub issues rocker-org/rocker-versioned#102 and #58. WORKAROUND: Add the following line to the depends and/or base Dockerfiles before (i.e. above) any commands that install R packages (e.g. Rscript -e \"install.packages(...)\"): RUN echo "options(repos = c(CRAN = 'https://cran.rstudio.org'))" >> /usr/local/lib/R/etc/Rprofile.site This will set the default repository to the more reliable (albeit, more up-to-date; beware of breaking package changes!) RStudio CRAN mirror. Then, build the image as usual. "],["docker-migrate.html", "25.8 Migrating PEcAn from VM to Docker", " 25.8 Migrating PEcAn from VM to Docker This document assumes you have read through the Introduction to Docker as well as Docker quickstart and have docker running on the VM. This document will slowly replace each of the components with the appropriate docker images. At then end of this document you should be able to use the docker-compose command to bring up the full docker stack as if you had started with this origianally. 25.8.1 Running BETY as a docker container This will replace the BETY application running on the machine with a docker image. This will assume you still have the database running on the local machine and the only thing we replace is the BETY application. If you are running systemd (Ubuntu 16.04 or Centos 7) you can copy the following file to /etc/systemd/system/bety.service (replace LOCAL_SERVER=99 with your actual server). If you have postgres running on another server replace 127.0.0.1 with the actual ip address of the postgres server. [Unit] Description=BETY container After=docker.service [Service] Restart=always ExecStart=/usr/bin/docker run -t --rm --name bety --add-host=postgres:127.0.0.1 --network=host --env RAILS_RELATIVE_URL_ROOT=/bety --env LOCAL_SERVER=99 pecan/bety ExecStop=/usr/bin/docker stop -t 2 bety [Install] WantedBy=local.target At this point we can enable the bety service (this only needs to be done once). First we need to tell systemd a new service is available using systemctl daemon-reload. Next we enable the BETY service so it will restart automatically when the machine reboots, using systemctl enable bety. Finally we can start the BETY service using systemctl start bety. At this point BETY is running as a docker container on port 8000. You can see the log messages using journalctl -u bety. Next we need to modify apache configuration files. The file /etc/apache2/conf-enabled/bety.conf will be replaced with the following content: ProxyPass /bety/ http://pecan.localhost/bety/ ProxyPassReverse /bety/ http://pecan.localhost/bety/ RedirectMatch permanent ^/bety$ /bety/ Once this modified we can restart apache using systemctl restart apache2. At this point BETY is running in a container and is accessable trough the webserver at http://server/bety/. To upgrade to a new version of BETY you can now use the docker commands. You can use the following commands to stop BETY, pull the latest image down, migrate the database (you made a backup correct?) and start BETY again. systemctl stop bety docker pull pecan/bety:latest docker run -ti --rm --add-host=postgres:127.0.0.1 --network=host --env LOCAL_SERVER=99 pecan/bety migrate systemctl start bety Once you are satisfied with the migration of BETY you can remove the bety folder as well as any ruby binaries you have installed. "],["pecan-api.html", "25.9 The PEcAn Docker API", " 25.9 The PEcAn Docker API If you have a running instance of Dockerized PEcAn (or other setup where PEcAn workflows are submitted via RabbitMQ), you have the option of running and managing PEcAn workflows using the pecanapi package. For more details, see the pecanapi package vignette and function-level documentation. What follows is a lightning introduction. 25.9.0.1 Installation The package can be installed directly from GitHub via devtools::install_github: devtools::install_github("pecanproject/pecan/api@develop") 25.9.0.2 Creating and submitting a workflow With pecanapi, creating a workflow, submitting it to RabbitMQ, monitoring its progress, and processing its output can all be accomplished via an R script. Start by loading the package (and the magrittr package, for the %>% pipe operator). library(pecanapi) library(magrittr) Set your PEcAn database user ID, and create a database connection object, which will be used for database operations throughout the workflow. options(pecanapi.user_id = 99000000002) con <- DBI::dbConnect( drv = RPostgres::Postgres(), user = "bety", password = "bety", host = "localhost", port = 5432 ) Find model and site IDs for the site and model you want to run. model_id <- get_model_id(con, "SIPNET", "136") all_umbs <- search_sites(con, "umbs%disturbance") site_id <- subset(all_umbs, !is.na(mat))[["id"]] Insert a new workflow into the PEcAn database, and extract its ID. workflow <- insert_new_workflow(con, site_id, model_id, start_date = "2004-01-01", end_date = "2004-12-31") workflow_id <- workflow[["id"]] Pull all of this information together into a settings list object. settings <- list() %>% add_workflow(workflow) %>% add_database() %>% add_pft("temperate.deciduous") %>% add_rabbitmq(con = con) %>% modifyList(list( meta.analysis = list(iter = 3000, random.effects = list(on = FALSE, use_ghs = TRUE)), run = list(inputs = list(met = list(source = "CRUNCEP", output = "SIPNET", method = "ncss"))), ensemble = list(size = 1, variable = "NPP") )) Submit the workflow via RabbitMQ, and monitor its progress in the R process. submit_workflow(settings) watch_workflow(workflow_id) Use THREDDS to access and analyze the output. sipnet_out <- ncdf4::nc_open(run_dap(workflow_id, "2004.nc")) gpp <- ncdf4::ncvar_get(sipnet_out, "GPP") time <- ncdf4::ncvar_get(sipnet_out, "time") ncdf4::nc_close(sipnet_out) plot(time, gpp, type = "l") "],["rabbitmq.html", "25.10 RabbitMQ", " 25.10 RabbitMQ This section provides additional details about how PEcAn uses RabbitMQ to manage communication between its Docker containers. In PEcAn, we use the Python pika client to retrieve messages from RabbitMQ. The PEcAn.remote library has convenience functions that wrap the API to post and read messages. The executor and models use the python version of the RabbitMQ scripts to retrieve messages, and launch the appropriate code. 25.10.1 Producer – PEcAn.remote::rabbitmq_post_message The PEcAn.remote::rabbitmq_post_message function allows you to post messages to RabbitMQ from R. In the RabbitMQ documentation, it is known as a “producer”. It takes the body of the message to be posted and will return any output generated when the message is posted to the approriate queue. The function has three required arguments and two optional ones: uri – This is the URI used to connect to RabbitMQ, it will have the form amqp://username:password\\@server:5672/vhost. Most containers will have this injected using the environment variable RABBITMQ_URI. queue – The queue to post the message on, this is either pecan for a workflow to be exected, the name of the model and version to be executed, for example SIPNET_r136. message – The actual message to be send, this is of type list and will be converted to a json string representation. prefix – The code will talk to the rest api, this is the prefix of the rest api. In the case of the default docker-compse file, this will be /rabbitmq and will be injected as RABBITMQ_PREFIX. port – The code will talk to the rest api, this is the port of the rest api. In the case of the default docker-compse file, this will be 15672 and will be injected as RABBITMQ_PORT. The PEcAn.remote::start_rabbitmq function is a wrapper for this function that provides an easy way to post a folder message to RabbitMQ. 25.10.2 Consumer – receiver.py The receiver.py script runs like a daemon, constantly listening for messages. In the RabbitMQ documentation, it is known as a “consumer”. In PEcAn, you can tell that it is ready to receive messages if the corresponding logs (e.g. docker-compose logs executor) show the following message: [*] Waiting for messages. To exit press CTRL+C. Our reciever is configured by three environment variables: RABBITMQ_URI – This defines the URI where RabbitMQ is running. See corresponding argument in the producer RABBITMQ_QUEUE – This is the name of the queue on which the consumer will listen for messages, just as in the producer. APPLICATION – This specifies the name (including the path) of the default executable to run when receiving a message. At the moment, it should be an executable that runs in the directory specified by the message’s folder variable. In the case of PEcAn models, this is usually ./job.sh, such that the folder corresponds to the run directory associated with a particular runID (i.e. where the job.sh is located). For the PEcAn workflow itself, this is set to R CMD BATCH workflow.R, such that the folder is the root directory of the workflow (in the executor Docker container, something like /data/workflows/PEcAn_<workflowID>). This default executable is overridden if the message contains a custom_application key. If included, the string specified by the custom_application key will be run as a command exactly as is on the container, from the directory specified by folder. For instance, in the example below, the container will print “Hello there!” instead of running its default application. {"custom_application": "echo 'Hello there!'", "folder": "/path/to/my/dir"} NOTE that in RabbitMQ messages, the folder key is always required. 25.10.3 RabbitMQ and the PEcAn web interface RabbitMQ is configured by the following variables in config.php: $rabbitmq_host – The RabbitMQ server hostname (default: rabbitmq, because that is the name of the rabbitmq service in docker-compose.yml) $rabbitmq_port – The port on which RabbitMQ listens for messages (default: 5672) $rabbitmq_vhost – The path of the RabbitMQ Virtual Host (default: /). $rabbitmq_queue – The name of the RabbitMQ queue associated with the PEcAn workflow (default: pecan) $rabbitmq_username – The RabbitMQ username (default: guest) $rabbitmq_password – The RabbitMQ password (default: guest) In addition, for running models via RabbitMQ, you will also need to add an entry like the following to the config.php $hostlist: $hostlist=array($fqdn => array("rabbitmq" => "amqp://guest:guest@rabbitmq/%2F"), ...) This will set the hostname to the name of the current machine (defined by the $fqdn variable earlier in the config.php file) to an array with one entry, whose key is rabbitmq and whose value is the RabbitMQ URI (amqp://...). These values are converted into the appropriate entries in the pecan.xml in web/04-runpecan.php. 25.10.4 RabbitMQ in the PEcAn XML RabbitMQ is a special case of remote execution, so it is configured by the host node. An example RabbitMQ configuration is as follows: <host> <rabbitmq> <uri>amqp://guest:guest@rabbitmq/%2F</uri> <queue>sipnet_136</queue> </rabbitmq> </host> Here, uri and queue have the same general meanings as described in “producer”. Note that queue here refers to the target model. In PEcAn, RabbitMQ model queues are named as MODELTYPE_REVISION, so the example above refers to the SIPNET model version 136. Another example is ED2_git, referring to the latest git version of the ED2 model. 25.10.5 RabbitMQ configuration in Dockerfiles As described in the “consumer” section, our standard RabbitMQ receiver script is configured using three environment variables: RABBITMQ_URI, RABBITMQ_QUEUE, and APPLICATION. Therefore, configuring a container to work with PEcAn’s RabbitMQ instance requires setting these three variables in the Dockerfile using an ENV statement. For example, this excerpt from docker/base/Dockerfile.executor (for the pecan/executor image responsible for the PEcAn workflow) sets these variables as follows: ENV RABBITMQ_URI="amqp://guest:guest@rabbitmq/%2F" \\ RABBITMQ_QUEUE="pecan" \\ APPLICATION="R CMD BATCH workflow.R" Similarly, this excerpt from docker/models/Dockerfile.sipnet (which builds the SIPNET model image) is a typical example for a model image. Note the use of ARG here to specify a default version model version of 136 while allowing this to be configurable (via --build-arg MODEL_VERSION=X) at build time: ARG MODEL_VERSION=136 ENV APPLICATION="./job.sh" \\ MODEL_TYPE="SIPNET" \\ MODEL_VERSION="${MODEL_VERSION}" ENV RABBITMQ_QUEUE="${MODEL_TYPE}_${MODEL_VERSION}" WARNING: Dockerfile environment variables set via ENV are assigned all at once; they do not evaluate successively, left to right. Consider the following block: # Don't do this! ENV MODEL_TYPE="SIPNET" \\ MODEL_VERSION=136 \\ RABBITMQ_QUEUE=${MODEL_TYPE}_${MODEL_VERSION} # <- Doesn't know about MODEL_TYPE or MODEL_VERSION! In this block, the expansion for setting RABBITMQ_QUEUE is not aware of the current values of MODEL_TYPE or MODEL_VERSION, and will therefore be set incorrectly to just _ (unless they have been set previously, in which case it will be aware only of their earlier values). As such, variables depending on other variables must be set in a separate, subsequent ENV statement than the variables they depend on. 25.10.6 Case study: PEcAn web interface The following describes in general terms what happens during a typical run of the PEcAn web interface with RabbitMQ. The user initializes all containers with docker-compose up. All the services that interact with RabbitMQ (executor and all models) run receiver.py in the foreground, waiting for messages to tell them what to do. The user browses to http://pecan.localhost/pecan/ and steps through the web interface. All the pages up to the 04-runpecan.php run on the web container, and are primarily for setting up the pecan.xml file. Once the user starts the PEcAn workflow at 04-runpecan.php, the underlying PHP code connects to RabbitMQ (based on the URI provided in config.php) and posts the following message to the pecan queue: {"folder": "/workflows/PEcAn_WORKFLOWID", "workflowid": "WORKFLOWID"} The executor service, which is listening on the pecan queue, hears this message and executes its APPLICATION (R CMD BATCH workflow.R) in the working directory specified in the message’s folder. The executor service then performs the pre-execution steps (e.g. trait meta-analysis, conversions) itself. Then, to actually execute the model, executor posts the following message to the target model’s queue: {"folder": "/workflows/PEcAn_WORKFLOWID/run/RUNID"} The target model service, which is listening on its dedicated queue, hears this message and runs its APPLICATION, which is job.sh, in the directory indicated by the message. Upon exiting (normally), the model service writes its status into a file called rabbitmq.out in the same directory. The executor container continuously looks for the rabbitmq.out file as an indication of the model run’s status. Once it sees this file, it reads the status and proceeds with the post-execution parts of the workflow. (NOTE that this isn’t perfect. If the model running process exits abnormally, the rabbitmq.out file may not be created, which can cause the executor container to hang. If this happens, the solution is to restart the executor container with docker-compose restart executor). "],["pecan-remote.html", "26 Remote execution with PEcAn", " 26 Remote execution with PEcAn Remote execution allows the user to leverage the power and storage of high performance computing clusters, AWS instances, or specially configured virtual machines, but without leaving their local working environment. PEcAn uses remote execution primarily to run ecosystem models. The infrastructure for remote execution lives in the PEcAn.remote package (base/remote in the PEcAn repository). This section describes the following: Checking capabilities to connect to the remote machine correctly: Basics of command line SSH SSH authentication with keys and passwords Basics of SSH tunnels, and how they are used in PEcAn Description of PEcAn related tools that control remote execution Basic remote execution R functions in PEcAn.remote SETUP- Configuration Files and settings Remote model execution configuration in the pecan.xml and config.php Additional information about preparing remote servers for execution "],["basics-of-ssh.html", "26.1 Basics of SSH", " 26.1 Basics of SSH All of the PEcAn remote infrastructure depends on the system ssh utility, so it’s important to make sure this works before attempting the advanced remote execution functionality in PEcAn. To connect to a remote server interactively, the command is simply: ssh <username>@<hostname> For instance, my connection to the BU shared computing cluster looks like: ssh ashiklom@geo.bu.edu It will prompt me for my BU password, and, if successful, will drop me into a login shell on the remote machine. Alternatively to the login shell, ssh can be used to execute arbitrary code, whose output will be returned exactly as it would if you ran the command locally. For example, the following: ssh ashiklom@geo.bu.edu pwd will run the pwd command, and return the path to my home directory on the BU SCC. The more advanced example below will run some simple R code on the BU SCC and return the output as if it was run locally. ssh ashiklom@geo.bu.edu Rscript -e "seq(1, 10)" "],["ssh-authentication-password-vs.-ssh-key.html", "26.2 SSH authentication – password vs. SSH key", " 26.2 SSH authentication – password vs. SSH key Because this server uses passwords for authentication, this command will then prompt me for my password. An alternative to password authentication is using SSH keys. Under this system, the host machine (say, your laptop, or the PEcAn VM) has to generate a public and private key pair (using the ssh-keygen command). The private key (by default, a file in ~/.ssh/id_rsa) lives on the host machine, and should never be shared with anyone. The public key will be distributed to any remote machines to which you want the host to be able to connect. On each remote machine, the public key should be added to a list of authorized keys located in the ~/.ssh/authorized_keys file (on the remote machine). The authorized keys list indicates which machines (technically, which keys – a single machine, and even a single user, can have many keys) are allowed to connect to it. This is the system used by all of the PEcAn servers (pecan1, pecan2, test-pecan). "],["ssh-tunneling.html", "26.3 SSH tunneling", " 26.3 SSH tunneling SSH authentication can be more advanced than indicated above, especially on systems that require dual authentication. Even simple password-protection can be tricky in scripts, since (by design) it is fairly difficult to get SSH to accept a password from anything other than the raw keyboard input (i.e. SSH doesn’t let you pass passwords as input or arguments, because this exposes your password as plain text). A convenient and secure way to follow SSH security protocol, but prevent having to go through the full authentication process every time, is to use SSH tunnels (or “sockets”, which are effectively synonymous). Essentially, an SSH socket is a read- and write-protectected file that contains all of the information about an SSH connection. To create an SSH tunnel, use a command like the following: ssh -n -N -f -o ControlMaster=yes -S /path/to/socket/file <username>@<hostname> If appropriate, this will prompt you for your password (if using password authentication), and then will drop you back to the command line (thanks to the -N flag, which runs SSH without executing a command, the -f flag, which pushes SSH into the background, and the -n flag, which prevents ssh from reading any input). It will also create the file /path/to/socket/file. To use this socket with another command, use the -S /path/to/file flag, pointing to the same tunnel file you just created. ssh -S /path/to/socket/file <hostname> <optional command> This will let you access the server without any sort of authentication step. As before, if <optional command> is blank, you will be dropped into an interactive shell on the remote, or if it’s a command, that command will be executed and the output returned. To close a socket, use the following: ssh -S /path/to/socket/file <hostname> -O exit This will delete the socket file and close the connection. Alternatively, a scorched earth approach to closing the SSH tunnel if you don’t remember where you put the socket file is something like the following: pgrep ssh # See which processes will be killed pkill ssh # Kill those processes …which will kill all user processes called ssh. To automatically create tunnels following a specific pattern, you can add the following to your ~/.ssh/config Host <hostname goes here> ControlMaster auto ControlPath /tmp/%r@%h:%p For more information, see man ssh. "],["ssh-tunnels-and-pecan.html", "26.4 SSH tunnels and PEcAn", " 26.4 SSH tunnels and PEcAn Many of the PEcAn.remote functions assume that a tunnel is already open. If working from the web interface, the tunnel will be opened for you by some under-the-hood PHP and Bash code, but if debugging or working locally, you will have to create the tunnel yourself. The best way to do this is to create the tunnel first, outside of R, as described above. (In the following examples, I’ll use my username ashiklom connecting to the test-pecan server with a socket stored in /tmp/testpecan. To follow along, replace these with your own username and designated server, respectively). ssh -nNf -o ControlMaster=yes -S /tmp/testpecan ashiklom@test-pecan.bu.edu Then, in R, create a host object, which is just a list containing the elements name (hostname) and tunnel (path to tunnel file). my_host <- list(name = "test-pecan.bu.edu", tunnel = "/tmp/testpecan") This host object can then be used in any of the remote execution functions. "],["basic-remote-execute-functions.html", "26.5 Basic remote execute functions", " 26.5 Basic remote execute functions The PEcAn.remote::remote.execute.cmd function runs a system command on a remote server (or on the local server, if host$name == \"localhost\"). x <- PEcAn.remote::remote.execute.cmd(host = my_host, cmd = "echo", args = "Hello world") x Note that remote.execute.cmd is similar to base R’s system2, in that the base command (in this case, echo) is passed separately from its arguments (\"Hello world\"). Note also that the output of the remote command is returned as a character. For R code, there is a special wrapper around remote.execute.cmd – PEcAn.remote::remote.execute.R, which runs R code (passed as a string) on a remote and returns the output. code <- " x <- 2:4 y <- 3:1 x ^ y " out <- PEcAn.remote::remote.execute.R(code = code, host = my_host) For additional functions related to remote file operations and other stuff, see the PEcAn.remote package documentation. "],["remote-model-execution-with-pecan.html", "26.6 Remote model execution with PEcAn", " 26.6 Remote model execution with PEcAn The workhorse of remote model execution is the PEcAn.workflow::start_model_runs function, which distributes execution of each run in a list of runs (e.g. multiple runs in an ensemble) to the local machine or a remote based on the configuration in the PEcAn settings. Broadly, there are three major types of model execution: Serialized (PEcAn.remote::start_serial) – This runs models one at a time, directly on the local machine or remote (i.e. same as calling the executables one at a time for each run). Via a queue system, (PEcAn.remote::start_qsub) – This uses a queue management system, such as SGE (e.g. qsub, qstat) found on the BU SCC machines, to submit jobs. For computationally intensive tasks, this is the recommended way to go. Via a model launcher script (PEcAn.remote::setup_modellauncher) – This is a highly customizable approach where task submission is controlled by a user-provided script (launcher.sh). "],["xml-configuration.html", "26.7 XML configuration", " 26.7 XML configuration The relevant section of the PEcAn XML file is the <host> block. Here is a minimal example from one of my recent runs: <host> <name>geo.bu.edu</name> <user>ashiklom</user> <tunnel>/home/carya/output//PEcAn_99000000008/tunnel/tunnel</tunnel> </host> Breaking this down: name – The hostname of the machine where the runs will be performed. Set it to localhost to run on the local machine. user – Your username on the remote machine (note that this may be different from the username on your local machine). tunnel – This is the tunnel file for the connection used by all remote execution files. The tunnel is created automatically by the web interface, but must be created by the user for command line execution. This configuration will run in serialized mode. To use qsub, the configuration is slightly more involved: <host> <name>geo.bu.edu</name> <user>ashiklom</user> <qsub>qsub -V -N @NAME@ -o @STDOUT@ -e @STDERR@ -S /bin/bash</qsub> <qsub.jobid>Your job ([0-9]+) .*</qsub.jobid> <qstat>qstat -j @JOBID@ || echo DONE</qstat> <tunnel>/home/carya/output//PEcAn_99000000008/tunnel/tunnel</tunnel> </host> The additional fields are as follows: qsub – The command used to submit jobs to the queue system. Despite the name, this can be any command used for any queue system. The following variables are available to be set here: @NAME@ – Job name to display @STDOUT@ – File to which stdout will be redirected @STDERR@ – File to which stderr will be redirected qsub.jobid – A regular expression, from which the job ID will be determined. This string will be parsed by R as jobid <- gsub(qsub.jobid, \"\\\\1\", output) – note that the first pattern match is taken as the job ID. qstat – The command used to check the status of a job. Internally, PEcAn will look for the DONE string at the end, so a structure like <some command indicating if any jobs are still running> || echo DONE is required. The @JOBID@ here is the job ID determined from the qsub.jobid parsing. Documentation for using the model launcher is currently unavailable. "],["configuration-for-pecan-web-interface.html", "26.8 Configuration for PEcAn web interface", " 26.8 Configuration for PEcAn web interface The config.php has a few variables that will control where the web interface can run jobs, and how to run those jobs. It is located in the /web directory and if you have not touched it yet it will be named as config.example.php. Rename it to ’config.php` and edit by folowing the following directions. These variables are $hostlist, $qsublist, $qsuboptions, and $SSHtunnel. In the near future $hostlist, $qsublist, $qsuboptions will be combined into a single list. $SSHtunnel : points to the script that creates an SSH tunnel. The script is located in the web folder and the default value of dirname(__FILE__) . DIRECTORY_SEPARATOR . \"sshtunnel.sh\"; most likely will work. $hostlist : is an array with by default a single value, only allowing jobs to run on the local server. Adding any other servers to this list will show them in the pull down menu when selecting machines, and will trigger the web page to be show to ask for a username and password for the remote execution (make sure to use HTTPS setup when asking for password to prevent it from being send in the clear). $qsublist : is an array of hosts that require qsub to be used when running the models. This list can include $fqdn to indicate that jobs on the local machine should use qsub to run the models. $qsuboptions : is an array that lists options for each machine. Currently it support the following options (see also [Run Setup] and look at the tags) array("geo.bu.edu" => array("qsub" => "qsub -V -N @NAME@ -o @STDOUT@ -e @STDERR@ -S /bin/bash", "jobid" => "Your job ([0-9]+) .*", "qstat" => "qstat -j @JOBID@ || echo DONE", "job.sh" => "module load udunits R/R-3.0.0_gnu-4.4.6", "models" => array("ED2" => "module load hdf5")) In this list qsub is the actual command line for qsub, jobid is the text returned from qsub, qstat is the command to check to see if the job is finished. job.sh and the value in models are additional entries to add to the job.sh file generated to run the model. This can be used to make sure modules are loaded on the HPC cluster before running the actual model. "],["running-pecan-code-for-remotely.html", "26.9 Running PEcAn code for remotely", " 26.9 Running PEcAn code for remotely You do not need to download PEcAn fully on your remote machine. You can compile and install the model specific code pieces of PEcAn on the cluster easily without having to install the full code base of PEcAn (and all OS dependencies). Use the git clone command to: devtools::install_github("pecanproject/pecan", subdir = 'base/utils') Next we need to install the model specific pieces, this is done almost the same (example for ED2): devtools::install_github("pecanproject/pecan", subdir = 'models/ed') This should install dependencies required. The following are some notes on how to install the model specifics on different HPC clusters* "],["special-case-geo.bu.html", "26.10 Special case: geo.bu.edu", " 26.10 Special case: geo.bu.edu Following modules need to be loaded: module load hdf5 udunits R/R-3.0.0_gnu-4.4.6 Next the following packages need to be installed, otherwise it will fall back on the older versions install site-library install.packages(c('udunits2', 'lubridate'), configure.args=c(udunits2='--with-udunits2-lib=/project/earth/packages/udunits-2.1.24/lib --with-udunits2-include=/project/earth/packages/udunits-2.1.24/include')) Finally to install support for both ED and SIPNET: devtools::install_github("pecanproject/pecan", subdir = 'base/utils') devtools::install_github("pecanproject/pecan", subdir = 'models/sipnet') devtools::install_github("pecanproject/pecan", subdir = 'models/ed') "],["data-assimilation-with-dart.html", "27 Data assimilation with DART", " 27 Data assimilation with DART In addition to the state assimilation routines found in the assim.sequential module, another approach for state data assimilation in PEcAn is through the DART workflow created by the DARES group in NCAR. This section gives a straight-forward explanation how to implement DART, focused on the technical aspects of the implementation. If there are any questions, feel free to send @Viskari an email (tt.viskari@gmail.com) or contacting DART support as they are quite awesome in helping people with problems. Also, if there are any suggestions on how to improve the wiki, please let me know. Running with current folders in PEcAn Currently the DART folders in PEcAn are that you can simply copy the structure there over a downloaded DART workflow and it should replace/add relevant files and folders. The most important step after that is to check and change the run paths in the following files: Path_name files in the work folders T_ED2IN file, as it indicates where the run results be written. advance_model.csh, as it indicates where to copy files from/to. Second thing is setting the state vector size. This is explained in more detail below, but essentially this is governed by the variable model_size in model_mod.f90. In addition to this, it should be changed in utils/F2R.f90 and R2F.f90 programs, which are responsible for reading and writing state variable information for the different ensembles. This also will be expanded below. Finally, the new state vector size should be updated for any other executable that runs it. Third thing needed are the initial condition and observation sequence files. They will always follow the same format and are explained in more detail below. Finally the ensemble size, which is the easiest to change. In the work subfolder, there is a file named input.nml. Simply changing the ensemble size there will set it for the run itself. Also remember that initial conditions file should have the equal amount of state vectors as there are ensemble members. Adjusting the workflow The central file for the actual workflow is advance_model.csh. It is a script DART calls to determine how the state vector changes between the two observation times and is essentially the only file one needs to change when changing state models or observations operators. The file itself should be commented to give a good idea of the flow, but beneath is a crude order of events. 1. Create a temporary folder to run the model in and copy/link required files in to it. 2. Read in the state vector values and times from DART. Here it is important to note that the values will be in binary format, which need to be read in by a Fortran program. In my system, there is a program called F2R which reads in the binary values and writes out in ascii form the state vector values as well as which ED2 history files it needs to copy based on the time stamps. 3. Run the observation operator, which writes the state vector state in to the history files and adjusts them if necessary. 4. Run the program. 5. Read the new state vector values from output files. 6. Convert the state vector values to the binary. In my system, this is done by the program R2F. Initial conditions file The initial conditions file, commonly named filter_ics although you can set it to something else in input.nml, is relatively simple in structure. It has one sequence repeating over the number of ensemble members. First line contains two times: Seconds and days. Just use one of them in this situation, but it has to match the starting time given in input.nml. After that each line should contain a value from the state vector in the order you want to treat them. R functions filter_ics.R and B_filter_ics.R in the R folder give good examples of how to create these. Observations files The file which contains the observations is commonly known as obs_seq.out, although again the name of the file can be changed in input.nml. The structure of the file is relatively straight-forward and the R function ObsSeq.R in the R subfolder has the write structure for this. Instead of writing it out here, I want to focus on a few really important details in this file. Each observations will have a time, a value, an uncertainty, a location and a kind. The first four are self-explanatory, but the kind is really important, but also unfortunately really easy to misunderstand. In this file, the kind does not refer to a unit or a type of observation, but which member of the state vector is this observation of. So if the kind was, for example, 5, it would mean that it was of the fifth member of the state vector. However, if the kind value is positive, the system assumes that there is some sort of an operator change in comparing the observation and state vector value which is specified in a subprogram in model_mod.f90. So for an direct identity comparison between the observation and the state vector value, the kind needs to be negative number of the state vector component. Thus, again if the observation is of the fifth state vector value, the kind should be set as -5. Thus it is recommendable that the state vector values have already been altered to be comparable with the observations. As for location, there are many ways to set in DART and the method needs to be chosen when compiling the code by giving the program which of the location mods it is to use. In our examples we used a 1-dimensional location vector with scaled values between 0 and 1. For future it makes sense to switch to a 2 dimensional long- and lat-scale, but for the time being the location does not impact the system a lot. The main impact will be if the covariances will be localized, as that will be decided on their locations. State variable vector in DART Creating/adjusting a state variable vector in DART is relatively straight-forward. Below are listed the steps to specify a state variable vector in DART. I. For each specific model, there should be an own folder within the DART root models folder. In this folder there is a model_mod.f90, which contains the model specific subroutines necessary for a DART run. At the beginning of this file there should be the following line: integer, parameter :: model_size = [number] The number here should be the number of variables in the vector. So for example if there were three state variables, then the line should look like this: integer, parameter :: model_size = 3 This number should also be changed to match with any of the other executables called during the run as indicated by the list above. In the DART root, there should be a folder named obs_kind, which contains a file called DEFAULT_obs_kind_mod.F90. It is important to note that all changes should be done to this file instead of obs_kind_mod.f90, as during compilation DART creates obs_kind_mod.f90 from DEFAULT_obs_kind_mod.F90. This program file contains all the defined observation types used by DART and numbers them for easier reference later. Different types are classified according to observation instrument or relevant observation phenomenon. Adding a new type only requires finding an unused number and starting a new identifying line with the following: integer, parameter, public :: & KIND_… Note that the observation kind should always be easy to understand, so avoid using unnecessary acronyms. For example, when adding an observation type for Leaf Area Index, it would look like below: integer, parameter, public :: & KIND_LEAF_AREA_INDEX = [number] In the DART root, there should be a folder named obs_def, which contains several files starting with obs_def_. There files contain the different available observation kinds classified either according to observation instrument or observable system. Each file starts with the line ! BEGIN DART PREPROCESS KIND LIST And end with line ! END DART PREPROCESS KIND LIST The lines between these two should contain ! The desired observation reference, the observation type, COMMON_CODE. For example, for observations relating to phenology, I have created a file called obs_def_phen_mod.f90. In this file I define the Leaf Area Index observations in the following way. ! BEGIN DART PREPROCESS KIND LIST ! LAI, TYPE_LEAF_AREA_INDEX, COMMON_CODE ! END DART PREPROCESS KIND LIST Note that the exclamation marks are necessary for the file. In the model specific folder, in the work subfolder there is a namelist file input.nml. This contains all the run specific information for DART. In it, there is a subtitle &preprocess, under which there is a line input_files = ‘….’ This input_files sections must be set to refer to the obs_def file created in step III. The input files can contain references to multiple obs_def files if necessary. As an example, the reference to the obs_def_phen_mod.f90 would look like input_files = ‘../../../obs_def/obs_def_phen_mod.f90’ V. Finally, as an optional step, the different values in state vector can be typed. In model_mod, referred to in step I, there is a subroutine get_state_meta_data. In it, there is an input variable index_in, which refers to the vector component. So for instance for the second component of the vector index_in would be 2. If this is done, the variable kind has to be also included at the beginning of the model_mod.f90 file, at the section which begins use obs_kind_mod, only :: The location of the variable can be set, but for a 0-dimensional model we are discussing here, this is not necessary. Here, though, it is possible to set the variable types by including the following line if(index_in .eq. [number]) var_type = [One of the variable kinds set in step II] If the length of the state vector is changed, it is important that the script ran with DART produces a vector of that length. Change appropriately if necessary. After these steps, DART should be able to run with the state vector of interest. "],["miscellaneous.html", "28 Miscellaneous ", " 28 Miscellaneous "],["todo.html", "28.1 TODO", " 28.1 TODO Add list of developers "],["using-the-pecan-download_file-function.html", "28.2 Using the PEcAn download_file() function", " 28.2 Using the PEcAn download_file() function download_file(url, destination_file, method) This custom PEcAn function works together with the base R function download.file (https://stat.ethz.ch/R-manual/R-devel/library/utils/html/download.file.html). However, it provides expanded functionality to generalize the use for a broad range of environments. This is because some computing environments are behind a firewall or proxy, including FTP firewalls. This may require the use of a custom FTP program and/or initial proxy server authentication to retrieve the files needed by PEcAn (e.g. meteorology drivers, other inputs) to run certain model simulations or tools. For example, the Brookhaven National Laboratory (BNL) requires an initial connection to a FTP proxy before downloading files via FTP protocol. As a result, the computers running PEcAn behind the BNL firewall (e.g. https://modex.bnl.gov) use the ncftp cleint (http://www.ncftp.com/) to download files for PEcAn because the base options with R::base download.file() such as curl, libcurl which don’t have the functionality to provide credentials for a proxy or even those such as wget which do but don’t easily allow for connecting through a proxy server before downloading files. The current option for use in these instances is ncftp, specifically ncftpget Examples: HTTP download_file("http://lib.stat.cmu.edu/datasets/csb/ch11b.txt","~/test.download.txt") FTP download_file("ftp://ftp.cdc.noaa.gov/Datasets/NARR/monolevel/pres.sfc.2000.nc", "~/pres.sfc.2000.nc") customizing to use ncftp when running behind an FTP firewall (requires ncftp to be installed and availible) download_file("ftp://ftp.cdc.noaa.gov/Datasets/NARR/monolevel/pres.sfc.2000.nc", "~/pres.sfc.2000.nc", method=""ncftpget") On modex.bnl.gov, the ncftp firewall configuration file (e.g. ~/.ncftp/firewall) is configured as: firewall-type=1 firewall-host=ftpgateway.sec.bnl.local firewall-port=21 which then allows for direct connection through the firewall using a command like: ncftpget ftp://ftp.unidata.ucar.edu/pub/netcdf/netcdf-fortran-4.4.4.tar.gz To allow the use of ncftpget from within the download.file() function you need to set your R profile download.ftp.method option in your options list. To see your current R options run options() from R cmd, which should look something like this: > options() $add.smooth [1] TRUE $bitmapType [1] "cairo" $browser [1] "/usr/bin/xdg-open" $browserNLdisabled [1] FALSE $CBoundsCheck [1] FALSE $check.bounds [1] FALSE $citation.bibtex.max [1] 1 $continue [1] "+ " $contrasts unordered ordered "contr.treatment" "contr.poly" In order to set your download.ftp.method option you need to add a line such as # set default FTP options(download.ftp.method = "ncftpget") In your ~/.Rprofile. On modex at BNL we have set the global option in /usr/lib64/R/etc/Rprofile.site. Once this is done you should be able to see the option set using this command in R: > options("download.ftp.method") $download.ftp.method [1] "ncftpget" "],["faq.html", "29 FAQ", " 29 FAQ "],["pecan-project-used-in-courses.html", "30 PEcAn Project Used in Courses ", " 30 PEcAn Project Used in Courses "],["university-classes.html", "30.1 University classes", " 30.1 University classes 30.1.1 GE 375 - Environmental Modeling - Spring 2013, 2014 (Mike Dietze, Boston University) The final “Case Study: Terrestrial Ecosystem Models” is a PEcAn-based hands-on activity. Each class has been 25 students. GE 585 - Ecological forecasting Fall 2013 (Mike Dietze, Boston University) "],["summer-courses-workshops.html", "30.2 Summer Courses / Workshops", " 30.2 Summer Courses / Workshops 30.2.1 Annual summer course in flux measurement and advanced modeling (Mike Dietze, Ankur Desai) Niwot Ridge, CO About 1/3 lecture, 2/3 hands-on (the syllabus is actually wrong as it list the other way around). Each class has 24 students. 2013 Syllabus see Tuesday Week 2 Data Assimilation lectures and PEcAn demo and the Class projects and presentations on Thursday and Friday. (Most students use PEcAn for their group projects. 2014 will be the third year that PEcAn has been used for this course. 30.2.2 Assimilating Long-Term Data into Ecosystem Models: Paleo-Ecological Observatory Network (PalEON) Project Here is a link to the course: https://www3.nd.edu/~paleolab/paleonproject/summer-course/ This course uses the same demo as above, including collecting data in the field and assimilating it (part 3) 30.2.3 Integrating Evidence on Forest Response to Climate Change: Physiology to Regional Abundance http://blue.for.msu.edu/macrosystems/workshop May 13-14, 2013 Session 4: Integrating Forest Data Into Ecosystem Models 30.2.4 Ecological Society of America meetings Workshop: Combining Field Measurements and Ecosystem Models "],["selected-publications.html", "30.3 Selected Publications", " 30.3 Selected Publications Dietze, M.C., D.S LeBauer, R. Kooper (2013) On improving the communication between models and data. Plant, Cell, & Environment doi:10.1111/pce.12043 LeBauer, D.S., D. Wang, K. Richter, C. Davidson, & M.C. Dietze. (2013). Facilitating feedbacks between field measurements and ecosystem models. Ecological Monographs. doi:10.1890/12-0137.1 "],["package-dependencies.html", "31 Package Dependencies ", " 31 Package Dependencies "],["executive-summary-what-to-usually-do.html", "31.1 Executive Summary: What to usually do", " 31.1 Executive Summary: What to usually do When you’re editing one PEcAn package and want to use a function from any other R package (including other PEcAn packages), the standard method is to add the other package to the Imports: field of your DESCRIPTION file, spell the function in fully namespaced form (pkg::function()) everywhere you call it, and be done. There are a few cases where this isn’t enough, but they’re rarer than you think. The rest of this section mostly deals with the exceptions to this rule and why not to use them when they can be avoided. "],["big-picture-whats-possible-to-do.html", "31.2 Big Picture: What’s possible to do", " 31.2 Big Picture: What’s possible to do To make one PEcAn package use functionality from another R package (including other PEcAn packages), you must do at least one and up to four things in your own package. Always, declare which packages your package depends on, so that R can install them as needed when someone installs your package and so that human readers can understand what additional functionality it uses. Declare dependencies by manually adding them to your package’s DESCRIPTION file. Sometimes, import functions from the dependency package into your package’s namespace, so that your functions know where to find them. This is only sometimes necessary, because you can usually use :: to call functions without importing them. Import functions by writing Roxygen @importFrom statements and do not edit the NAMESPACE file by hand. Rarely, load dependency code into the R environment, so that the person using your package can use it without loading it separately. This is usually a bad idea, has caused many subtle bugs, and in PEcAn it should only be used when unavoidable. When unavoidable, prefer requireNamespace(... quietly = TRUE) over Depends: or require() or library(). Only if your dependency relies on non-R tools, install any components that R won’t know how to find for itself. These components are often but not always identifiable from a SystemRequirements field in the dependency’s DESCRIPTION file. The exact installation procedure will vary case by case and from one operating system to another, and for PEcAn the key point is that you should skip this step until it proves necessary. When it does prove necessary, edit the documentation for your package to include advice on installing the dependency components, then edit the PEcAn build and testing scripts as needed so that they follow your advice. The advice below about each step is written specifically for PEcAn, although much of it holds for R packages in general. For more details about working with dependencies, start with Hadley Wickham’s R packages and treat the CRAN team’s Writing R Extensions as the final authority. "],["declaring-dependencies-depends-suggests-imports.html", "31.3 Declaring Dependencies: Depends, Suggests, Imports", " 31.3 Declaring Dependencies: Depends, Suggests, Imports List all dependencies in the DESCRIPTION file. Every package that is used by your package’s code must appear in exactly one of the sections Depends, Imports, or Suggests. Please list packages in alphabetical order within each section. R doesn’t care about the order, but you will later when you’re trying to check whether this package uses a particular dependency. Imports is the correct place to declare most PEcAn dependencies. This ensures that they get installed, but does not automatically import any of their functions – Since PEcAn style prefers to mostly use :: instead of importing, this is what we want. Depends is, despite the name, usually the wrong place to declare PEcAn dependencies. The only difference between Depends and Imports is that when the user attaches your packages to their own R workspace (e.g. using library(\"PEcAn.yourpkg\")), the packages in Depends are attached as well. Notice that a call like PEcAn.yourpkg::yourfun() will not attach your package or its dependencies, so your code still needs to import or ::-qualify all functions from packages listed in Depends. In short, Depends is not a shortcut, is for user convenience not developer convenience, and makes it easy to create subtle bugs that appear to work during interactive test sessions but fail when run from scripts. As the R extensions manual puts it (emphasis added): This [Imports and Depends] scheme was developed before all packages had namespaces (R 2.14.0 in October 2011), and good practice changed once that was in place. Field ‘Depends’ should nowadays be used rarely, only for packages which are intended to be put on the search path to make their facilities available to the end user (and not to the package itself).” The Suggests field can be used to declare dependencies on packages that make your package more useful but are not completely essential. Again from the R extensions manual: The Suggests field uses the same syntax as Depends and lists packages that are not necessarily needed. This includes packages used only in examples, tests or vignettes (see Writing package vignettes), and packages loaded in the body of functions. E.g., suppose an example from package foo uses a dataset from package bar. Then it is not necessary to have bar use foo unless one wants to execute all the examples/tests/vignettes: it is useful to have bar, but not necessary. Some of the PEcAn model interface packages push this definition of “not necessarily needed” by declaring their coupled model package in Suggests rather than Imports. For example, the PEcAn.BIOCRO package cannot do anything useful when the BioCro model is not installed, but it lists BioCro in Suggests because PEcAn as a whole can work without it. This is a compromise to simplify installation of PEcAn for users who only plan to use a few models, so that they can avoid the bother of installing BioCro if they only plan to run, say, SIPNET. Since the point of Suggests is that they are allowed to be missing, all code that uses a suggested package must behave reasonably when the package is not found. Depending on the situation, “reasonably” could mean checking whether the package is available and throwing an error as needed (PEcAn.BIOCRO uses its .onLoad function to check at load time whether BioCro is installed and will refuse to load if it is not), or providing an alternative behavior (PEcAn.data.atmosphere::get_NARR_thredds checks at call time for either parallel or doParallel and uses whichever one it finds first), or something else, but your code should never just assume that the suggested package is available. You are not allowed to import functions from Suggests into your package’s namespace, so always call them in ::-qualified form. By default R will not install suggested packages when your package is installed, but users can change this using the dependencies argument of install.packages. Note that for testing on Travis CI, PEcAn does install all Suggests (because they are required for full package checks), so any of your code that runs when a suggested package is not available will never be exercised by Travis checks. It is often tempting to move a dependency from Imports to Suggests because it is a hassle to install (large, hard to compile, no longer available from CRAN, currently broken on GitHub, etc), in the hopes that this will isolate the rest of PEcAn from the troublesome dependency. This helps for some cases, but fails for two very common ones: It does not reduce install time for CI builds, because all suggested packages need to be present when running full package checks (R CMD check or devtools::check or make check). It also does not prevent breakage when updating PEcAn via make install, because devtools::install_deps does not install suggested packages that are missing but does try to upgrade any that are already installed to the newest available version – even if the installed version took ages to compile and would have worked just fine! "],["importing-functions-use-roxygen.html", "31.4 Importing Functions: Use Roxygen", " 31.4 Importing Functions: Use Roxygen PEcAn style is to import very few functions and instead use fully namespaced function calls (pkg::fn()) everywhere it’s practical to do so. In cases where double-colon notation would be especially burdensome, such as when importing custom binary operators like %>%, it’s acceptable to import specific functions into the package namespace. Do this by using Roxygen comments of the form #' @importFrom pkg function, not by hand-editing the NAMESPACE file. If the import is only used in one or a few functions, use an @importFrom in the documentation for each function that uses it. If it is used all over the package, use a single @importFrom in the Roxygen documentation block for the whole package, which probably lives in a file called either zzz.R or <your_package_name>-package.R: #' What your package does #' #' Longer description of the package goes here. #' Probably with links to other resources about it, citations, etc. #' #' @docType package #' @name PEcAn.yourpkg #' @importFrom magrittr %>% NULL Roxygen will make sure there’s only one NAMESPACE entry per imported function no matter how many importFrom statements there are, but please pick a scheme (either import on every usage or once for the whole package), stick with it, and do not make function x() rely on an importFrom in the comments above function y(). Please do not import entire package namespaces (#' @import pkg); it increases the chance of function name collisions and makes it much harder to understand which package a given function was called from. A special note about importing functions from the tidyverse: Be sure to import from the package(s) that actually contain the functions you want to use, e.g. Imports: dplyr, magrittr, purrr / @importFrom magrittr %>% / purrr::map(...), not Imports: tidyverse / @importFrom tidyverse %>% / tidyverse::map(...). The package named tidyverse is just a interactive shortcut that loads the whole collection of constituent packages; it doesn’t export any functions in its own namespace and therefore importing it into your package doesn’t make them available. "],["loading-code-dont-but-use-requirenamespace-when-you-do.html", "31.5 Loading Code: Don’t… But Use requireNamespace When You Do", " 31.5 Loading Code: Don’t… But Use requireNamespace When You Do The very short version of this section: We want to maintain clear separation between the package’s namespace (which we control and want to keep predictable) and the global namespace (which the user controls, might change in ways we have no control over, and will be justifiably angry if we change it in ways they were not expecting). Therefore, avoid attaching packages to the search path (so no Depends and no library() or require() inside functions), and do not explicitly load other namespaces if you can help it. The longer version requires that we make a distinction often glossed over: Loading a package makes it possible for R to find things in the package namespace and does any actions needed to make it ready for use (e.g. running its .onLoad method, loading DLLs if the package contains compiled code, etc). Attaching a package (usually by calling library(\"somePackage\")) loads it if it wasn’t already loaded, and then adds it to the search path so that the user can find things in its namespace. As discussed in the “Declaring Dependancies” section above, dependencies listed in Depends will be attached when your package is attached, but they will be neither attached nor loaded when your package is loaded without being attached. Loading a dependency into the package namespace is undesirable because it makes it hard to understand our own code – if we need to use something from elsewhere, we’d prefer call it from its own namespace using :: (which implicitly loads the dependency!) or explicitly import it with a Roxygen @import directive. But in a few cases this isn’t enough. The most common reason to need to explicitly load a dependency is that some packages define new S3 methods for generic functions defined in other packages, but do not export these methods directly. We would prefer that these packages did not do this, but sometimes we have to use them anyway. An example from PEcAn is that PEcAn.MA needs to call as.matrix on objects of class mcmc.list. When the coda namespace is loaded, as.matrix(some_mcmc.list) can be correctly dispatched by base::as.matrix to the unexported method coda:::as.matrix.mcmc.list, but when coda is not loaded this dispatch will fail. Unfortunately coda does not export as.matrix.mcmc.list so we cannot call it directly or import it into the PEcAn.MA namespace, so instead we load the coda namespace whenever PEcAn.MA is loaded. Attaching packages to the user’s search path is even more problematic because it makes it hard for the user to understand how your package will affect their own code. Packages attached by a function stay attached after the function exits, so they can cause name collisions far downstream of the function call, potentially in code that has nothing to do with your package. And worse, since names are looked up in reverse order of package loading, it can cause behavior that differs in strange ways depending on the order of lines that look independent of each other: library(Hmisc) x = ... y = 3 summarize(x) # calls Hmisc::summarize y2 <- some_package_that_attaches_dplyr::innocent.looking.function(y) # Loading required package: dplyr summarize(x) # Looks identical to previous summarize, but calls dplyr::summarize! This is not to say that users will never want your package to attach another one for them, just that it’s rare and that attaching dependencies is much more likely to cause bugs than to fix them and additionally doesn’t usually save the package author any work. One possible exception to the do-not-attach-packages rule is a case where your dependency ignores all good practice and wrongly assumes, without checking, that all of its own dependencies are attached; if its DESCRIPTION uses only Depends instead of Imports, this is often a warning sign. For example, a small-but-surprising number of packages depend on the methods package without proper checks (this is probably because most but not all R interpreters attach methods by default and therefore it’s easy for an author to forget it might ever be otherwise unless they happen to test with a but-not-all interpreter). If you find yourself with a dependency that does this, accept first that you are relying on a package that is broken, and you should either convince its maintainer to fix it or find a way to remove the dependency from PEcAn. But as a short-term workaround, it is sometimes possible for your code to attach the direct dependency so that it will behave right with regard to its secondary dependencies. If so, make sure the attachment happens every time your package is loaded (e.g. by calling library(depname) inside your package’s .onLoad method) and not just when your package is attached (e.g. by putting it in Depends). When you do need to load or attach a dependency, it is probably better to do it inside your package’s .onLoad method rather than in individual functions, but this isn’t ironclad. To only load, use requireNamespace(pkgname, quietly=TRUE) – this will make it available inside your package’s namespace while avoiding (most) annoying loadtime messages and not disturbing the user’s search path. To attach when you really can’t avoid it, declare the dependency in Depends and also attach it using library(pkgname) in your .onLoad method. Note that scripts in inst/ are considered to be sample code rather than part of the package namespace, so it is acceptable for them to explicitly attach packages using library(). You may also see code that uses require(pkgname); this is just like library, but returns FALSE instead of erroring if package load fails. It is OK for scripts in inst/ that can do and do do something useful when a dependency is missing, but if it is used as if(!require(pkg)){ stop(...)} then replace it with library(pkg). If you think your package needs to load or attach code for any reason, please note why in your pull request description and be prepared for questions about it during code review. If your reviewers can think of an alternate approach that avoids loading or attaching, they will likely ask you to use it even if it creates extra work for you. "],["installing-dependencies-let-the-machines-do-it.html", "31.6 Installing dependencies: Let the machines do it", " 31.6 Installing dependencies: Let the machines do it In most cases you won’t need to think about how dependencies get installed – just declare them in your package’s DESCRIPTION and the installation will be handled automatically by R and devtools during the build process. The main exception is when a dependency relies on non-R software that R does not know how to install automatically. For example, rjags relies on JAGS, which might be installed in a different place on every machine. If your dependency falls in this category, you will know (because your CI builds will keep failing until you figure out how to fix it), but the exact details of the fix will differ for every case. "],["appendix-testthat.html", "32 Testing with the testthat package", " 32 Testing with the testthat package Tests are found in <package>/tests/testthat/ (for example, base/utils/inst/tests/) See attached file and http://r-pkgs.had.co.nz/tests.html for details on how to use the testthat package. "],["list-of-expectations.html", "32.1 List of Expectations", " 32.1 List of Expectations Full Abbreviation expect_that(x, is_true()) expect_true(x) expect_that(x, is_false()) expect_false(x) expect_that(x, is_a(y)) expect_is(x, y) expect_that(x, equals(y)) expect_equal(x, y) expect_that(x, is_equivalent_to(y)) expect_equivalent(x, y) expect_that(x, is_identical_to(y)) expect_identical(x, y) expect_that(x, matches(y)) expect_matches(x, y) expect_that(x, prints_text(y)) expect_output(x, y) expect_that(x, shows_message(y)) expect_message(x, y) expect_that(x, gives_warning(y)) expect_warning(x, y) expect_that(x, throws_error(y)) expect_error(x, y) "],["basic-use-of-the-testthat-package.html", "32.2 Basic use of the testthat package", " 32.2 Basic use of the testthat package Create a file called <packagename>/tests/testthat.R with the following contents: library(testthat) library(mypackage) test_check("mypackage") Tests should be placed in <packagename>/tests/testthat/test-<sourcefilename>.R, and look like the following: test_that("mathematical operators plus and minus work as expected",{ expect_equal(sum(1,1), 2) expect_equal(sum(-1,-1), -2) expect_equal(sum(1,NA), NA) expect_error(sum("cat")) set.seed(0) expect_equal(sum(matrix(1:100)), sum(data.frame(1:100))) }) test_that("different testing functions work, giving excuse to demonstrate",{ expect_identical(1, 1) expect_identical(numeric(1), integer(1)) expect_equivalent(numeric(1), integer(1)) expect_warning(mean('1')) expect_that(mean('1'), gives_warning("argument is not numeric or logical: returning NA")) expect_warning(mean('1'), "argument is not numeric or logical: returning NA") expect_message(message("a"), "a") }) "],["data-for-tests.html", "32.3 Data for tests", " 32.3 Data for tests Many of PEcAn’s functions require inputs that are provided as data. These can be in the data or the inst/extdata folders of a package. Data that are not package specific should be placed in the PEcAn.all (base/all) or PEcAn.utils (base/utils) packages. Some useful conventions: "],["settings-1.html", "32.4 Settings", " 32.4 Settings A generic settings can be found in the PEcAn.all package settings.xml <- system.file("pecan.biocro.xml", package = "PEcAn.BIOCRO") settings <- read.settings(settings.xml) database settings can be specified, and tests run only if a connection is available We currently use the following database to run tests against; tests that require access to a database should check db.exists() and be skipped if it returns FALSE to avoid failed tests on systems that do not have the database installed. settings$database <- list(userid = "bety", passwd = "bety", name = "bety", # database name host = "localhost" # server name) test_that(..., { skip_if_not(db.exists(settings$database)) ## write tests here }) instructions for installing this are available on the VM creation wiki examples can be found in the PEcAn.DB package (base/db/tests/testthat/). Model specific settings can go in the model-specific module, for example: settings.xml <- system.file("extdata/pecan.biocro.xml", package = "PEcAn.BIOCRO") settings <- read.settings(settings.xml) test-specific settings: settings text can be specified inline: settings.text <- " <pecan> <nocheck>nope</nocheck> ## allows bypass of checks in the read.settings functions <pfts> <pft> <name>ebifarm.pavi</name> <outdir>test/</outdir> </pft> </pfts> <outdir>test/</outdir> <database> <userid>bety</userid> <passwd>bety</passwd> <location>localhost</location> <name>bety</name> </database> </pecan>" settings <- read.settings(settings.text) values in settings can be updated: settings <- read.settings(settings.text) settings$outdir <- "/tmp" ## or any other settings "],["helper-functions-for-unit-tests.html", "32.5 Helper functions for unit tests", " 32.5 Helper functions for unit tests PEcAn.utils::tryl returns FALSE if function gives error PEcAn.utils::temp.settings creates temporary settings file PEcAn.DB::db.exists returns TRUE if connection to database is available "],["developer-devtools.html", "33 devtools package", " 33 devtools package Provides functions to simplify development Documentation: The R devtools package load_all("pkg") document("pkg") test("pkg") install("pkg") build("pkg") other tips for devtools (from the documentation): Adding the following to your ~/.Rprofile will load devtools when running R in interactive mode: # load devtools by default if (interactive()) { suppressMessages(require(devtools)) } Adding the following to your .Rpackages will allow devtools to recognize package by folder name, rather than directory path # in this example, devhome is the pecan trunk directory devhome <- "/home/dlebauer/R-dev/pecandev/" list( default = function(x) { file.path(devhome, x, x) }, "utils" = paste(devhome, "pecandev/utils", sep = "") "common" = paste(devhome, "pecandev/common", sep = "") "all" = paste(devhome, "pecandev/all", sep = "") "ed" = paste(devhome, "pecandev/models/ed", sep = "") "uncertainty" = paste(devhome, "modules/uncertainty", sep = "") "meta.analysis" = paste(devhome, "modules/meta.analysis", sep = "") "db" = paste(devhome, "db", sep = "") ) Now, devtools can take pkg as an argument instead of /path/to/pkg/, e.g. so you can use build(\"pkg\") instead of build(\"/path/to/pkg/\") "],["models_singularity.html", "34 singularity", " 34 singularity Running a model using singularity. This is work in progress. This assumes you have singulariy already installed. This will work on a Linux machine (x86_64). First make sure you have all the data files: bash script to install required files (click to expand) #!/bin/bash if [ ! -e sites ]; then curl -s -o sites.tgz http://isda.ncsa.illinois.edu/~kooper/EBI/sites.tgz tar zxf sites.tgz sed -i -e "s#/home/kooper/Projects/EBI#/data/sites#" sites/*/ED_MET_DRIVER_HEADER rm sites.tgz fi if [ ! -e inputs ]; then curl -s -o inputs.tgz http://isda.ncsa.illinois.edu/~kooper/EBI/inputs.tgz tar zxf inputs.tgz rm inputs.tgz fi if [ ! -e testrun.s83 ]; then curl -s -o testrun.s83.zip http://isda.ncsa.illinois.edu/~kooper/EBI/testrun.s83.zip unzip -q testrun.s83.zip sed -i -e "s#/home/pecan#/data#" testrun.s83/ED2IN rm testrun.s83.zip fi if [ ! -e ${HOME}/sites/Santarem_Km83 ]; then curl -s -o Santarem_Km83.zip http://isda.ncsa.illinois.edu/~kooper/EBI/Santarem_Km83.zip unzip -q -d sites Santarem_Km83.zip sed -i -e "s#/home/pecan#/data#" sites/Santarem_Km83/ED_MET_DRIVER_HEADER rm Santarem_Km83.zip fi Next edit the ED2IN file in testrun.s83 ED2IN file (click to expand) !==========================================================================================! !==========================================================================================! ! ED2IN . ! ! ! ! This is the file that contains the variables that define how ED is to be run. There ! ! is some brief information about the variables here. ! !------------------------------------------------------------------------------------------! $ED_NL !----- Simulation title (64 characters). -----------------------------------------------! NL%EXPNME = 'ED version 2.1 test' !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! Type of run: ! ! INITIAL -- Starts a new run, that can be based on a previous run (restart/history), ! ! but then it will use only the biomass and soil carbon information. ! ! HISTORY -- Resumes a simulation from the last history. This is different from ! ! initial in the sense that exactly the same information written in the ! ! history will be used here. ! !---------------------------------------------------------------------------------------! NL%RUNTYPE = 'INITIAL' !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! Start of simulation. Information must be given in UTC time. ! !---------------------------------------------------------------------------------------! NL%IMONTHA = 01 NL%IDATEA = 01 NL%IYEARA = 2001 NL%ITIMEA = 0000 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! End of simulation. Information must be given in UTC time. ! !---------------------------------------------------------------------------------------! NL%IMONTHZ = 01 NL%IDATEZ = 01 NL%IYEARZ = 2002 NL%ITIMEZ = 0000 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! DTLSM -- Time step to integrate photosynthesis, and the maximum time step for ! ! integration of energy and water budgets (units: seconds). Notice that the ! ! model will take steps shorter than this if this is too coarse and could ! ! lead to loss of accuracy or unrealistic results in the biophysics. ! ! Recommended values are < 60 seconds if INTEGRATION_SCHEME is 0, and 240-900 ! ! seconds otherwise. ! ! RADFRQ -- Time step to integrate radiation, in seconds. This must be an integer ! ! multiple of DTLSM, and we recommend it to be exactly the same as DTLSM. ! !---------------------------------------------------------------------------------------! NL%DTLSM = 600. NL%RADFRQ = 600. !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! The following variables are used in case the user wants to run a regional run. ! ! ! ! N_ED_REGION -- number of regions for which you want to run ED. This can be set to ! ! zero provided that N_POI is not... ! ! GRID_TYPE -- which kind of grid to run: ! ! 0. Longitude/latitude grid ! ! 1. Polar-stereographic ! !---------------------------------------------------------------------------------------! NL%N_ED_REGION = 0 NL%GRID_TYPE = 1 !------------------------------------------------------------------------------------! ! The following variables are used only when GRID_TYPE is set to 0. You must ! ! provide one value for each grid, except otherwise noted. ! ! ! ! GRID_RES -- Grid resolution, in degrees (first grid only, the other grids ! ! resolution will be defined by NSTRATX/NSTRATY). ! ! ED_REG_LATMIN -- Southernmost point of each region. ! ! ED_REG_LATMAX -- Northernmost point of each region. ! ! ED_REG_LONMIN -- Westernmost point of each region. ! ! ED_REG_LONMAX -- Easternmost point of each region. ! !------------------------------------------------------------------------------------! NL%GRID_RES = 1.0 NL%ED_REG_LATMIN = -12.0, -7.5, 10.0, -6.0 NL%ED_REG_LATMAX = 1.0, -3.5, 15.0, -1.0 NL%ED_REG_LONMIN = -66.0,-58.5, 70.0, -63.0 NL%ED_REG_LONMAX = -49.0,-54.5, 35.0, -53.0 !------------------------------------------------------------------------------------! !------------------------------------------------------------------------------------! ! The following variables are used only when GRID_TYPE is set to 1. ! ! ! ! NNXP -- number of points in the X direction. One value for each grid. ! ! NNYP -- number of points in the Y direction. One value for each grid. ! ! DELTAX -- grid resolution in the X direction, near the grid pole. Units: [ m]. ! ! this value is used to define the first grid only, other grids are ! ! defined using NNSTRATX. ! ! DELTAY -- grid resolution in the Y direction, near the grid pole. Units: [ m]. ! ! this value is used to define the first grid only, other grids are ! ! defined using NNSTRATX. Unless you are running some specific tests, ! ! both DELTAX and DELTAY should be the same. ! ! POLELAT -- Latitude of the pole point. Set this close to CENTLAT for a more ! ! traditional "square" domain. One value for all grids. ! ! POLELON -- Longitude of the pole point. Set this close to CENTLON for a more ! ! traditional "square" domain. One value for all grids. ! ! CENTLAT -- Latitude of the central point. One value for each grid. ! ! CENTLON -- Longitude of the central point. One value for each grid. ! !------------------------------------------------------------------------------------! NL%NNXP = 110 NL%NNYP = 70 NL%DELTAX = 60000. NL%DELTAY = 60000. NL%POLELAT = -2.609075 NL%POLELON = -60.2093 NL%CENTLAT = -2.609075 NL%CENTLON = -60.2093 !------------------------------------------------------------------------------------! !------------------------------------------------------------------------------------! ! Nest ratios. These values are used by both GRID_TYPE=0 and GRID_TYPE=1. ! ! NSTRATX -- this is will divide the values given by DELTAX or GRID_RES for the ! ! nested grids. The first value should be always one. ! ! NSTRATY -- this is will divide the values given by DELTAY or GRID_RES for the ! ! nested grids. The first value should be always one, and this must ! ! be always the same as NSTRATX when GRID_TYPE = 0, and this is also ! ! strongly recommended for when GRID_TYPE = 1. ! !------------------------------------------------------------------------------------! NL%NSTRATX = 1,4 NL%NSTRATY = 1,4 !------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! The following variables are used to define single polygon of interest runs, and ! ! they are ignored when N_ED_REGION = 0. ! ! ! ! N_POI -- number of polygons of interest (POIs). This can be zero as long as ! ! N_ED_REGION is not. ! ! POI_LAT -- list of latitudes of each POI. ! ! POI_LON -- list of longitudes of each POI. ! ! POI_RES -- grid resolution of each POI (degrees). This is used only to define the ! ! soil types ! !---------------------------------------------------------------------------------------! NL%N_POI = 1 NL%POI_LAT = -3.018 NL%POI_LON = -54.971 NL%POI_RES = 1.00 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! LOADMETH -- Load balancing method. This is used only in regional runs run in ! ! parallel. ! ! 0. Let ED decide the best way of splitting the polygons. Commonest ! ! option and default. ! ! 1. One of the methods to split polygons based on their previous ! ! work load. Developpers only. ! ! 2. Try to load an equal number of SITES per node. Useful for when ! ! total number of polygon is the same as the total number of cores. ! ! 3. Another method to split polygons based on their previous work load. ! ! Developpers only. ! !---------------------------------------------------------------------------------------! NL%LOADMETH = 0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! ED2 File output. For all the variables 0 means no output and 3 means HDF5 output. ! ! ! ! IFOUTPUT -- Fast analysis. These are mostly polygon-level averages, and the time ! ! interval between files is determined by FRQANL ! ! IDOUTPUT -- Daily means (one file per day) ! ! IMOUTPUT -- Monthly means (one file per month) ! ! IQOUTPUT -- Monthly means of the diurnal cycle (one file per month). The number ! ! of points for the diurnal cycle is 86400 / FRQANL ! ! IYOUTPUT -- Annual output. ! ! ITOUTPUT -- Instantaneous fluxes, mostly polygon-level variables, one file per year. ! ! ISOUTPUT -- restart file, for HISTORY runs. The time interval between files is ! ! determined by FRQHIS ! !---------------------------------------------------------------------------------------! NL%IFOUTPUT = 0 NL%IDOUTPUT = 0 NL%IMOUTPUT = 0 NL%IQOUTPUT = 3 NL%IYOUTPUT = 0 NL%ITOUTPUT = 0 NL%ISOUTPUT = 3 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! ATTACH_METADATA -- Flag for attaching metadata to HDF datasets. Attaching metadata ! ! will aid new users in quickly identifying dataset descriptions but ! ! will compromise I/O performance significantly. ! ! 0 = no metadata, 1 = attach metadata ! !---------------------------------------------------------------------------------------! NL%ATTACH_METADATA = 0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! UNITFAST -- The following variables control the units for FRQFAST/OUTFAST, and ! ! UNITSTATE FRQSTATE/OUTSTATE, respectively. Possible values are: ! ! 0. Seconds; ! ! 1. Days; ! ! 2. Calendar months (variable) ! ! 3. Calendar years (variable) ! ! ! ! N.B.: 1. In case OUTFAST/OUTSTATE are set to special flags (-1 or -2) ! ! UNITFAST/UNITSTATE will be ignored for them. ! ! 2. In case IQOUTPUT is set to 3, then UNITFAST has to be 0. ! ! ! !---------------------------------------------------------------------------------------! NL%UNITFAST = 0 NL%UNITSTATE = 3 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! OUTFAST/OUTSTATE -- these control the number of times per file. ! ! 0. Each time gets its own file ! ! -1. One file per day ! ! -2. One file per month ! ! > 0. Multiple timepoints can be recorded to a single file reducing ! ! the number of files and i/o time in post-processing. ! ! Multiple timepoints should not be used in the history files ! ! if you intend to use these for HISTORY runs. ! !---------------------------------------------------------------------------------------! NL%OUTFAST = 1. NL%OUTSTATE = 1. !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! ICLOBBER -- What to do in case the model finds a file that it was supposed the ! ! written? 0 = stop the run, 1 = overwrite without warning. ! ! FRQFAST -- time interval between analysis files, units defined by UNITFAST. ! ! FRQSTATE -- time interval between history files, units defined by UNITSTATE. ! !---------------------------------------------------------------------------------------! NL%ICLOBBER = 1 NL%FRQFAST = 3600. NL%FRQSTATE = 1. !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! FFILOUT -- Path and prefix for analysis files (all but history/restart). ! ! SFILOUT -- Path and prefix for history files. ! !---------------------------------------------------------------------------------------! NL%FFILOUT = '/data/testrun.s83/analy/ts83' NL%SFILOUT = '/data/testrun.s83/histo/ts83' !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! IED_INIT_MODE -- This controls how the plant community and soil carbon pools are ! ! initialised. ! ! ! ! -1. Start from a true bare ground run, or an absolute desert run. This will ! ! never grow any plant. ! ! 0. Start from near-bare ground (only a few seedlings from each PFT to be included ! ! in this run). ! ! 1. This will use history files written by ED-1.0. It will grab the ecosystem ! ! state (like biomass, LAI, plant density, etc.), but it will start the ! ! thermodynamic state as a new simulation. ! ! 2. Same as 1, but it uses history files from ED-2.0 without multiple sites, and ! ! with the old PFT numbers. ! ! 3. Same as 1, but using history files from ED-2.0 with multiple sites and ! ! TOPMODEL hydrology. ! ! 4. Same as 1, but using ED2.1 H5 history/state files that take the form: ! ! 'dir/prefix-gxx.h5' ! ! Initialization files MUST end with -gxx.h5 where xx is a two digit integer ! ! grid number. Each grid has its own initialization file. As an example, if a ! ! user has two files to initialize their grids with: ! ! example_file_init-g01.h5 and example_file_init-g02.h5 ! ! NL%SFILIN = 'example_file_init' ! ! ! ! 5. This is similar to option 4, except that you may provide several files ! ! (including a mix of regional and POI runs, each file ending at a different ! ! date). This will not check date nor grid structure, it will simply read all ! ! polygons and match the nearest neighbour to each polygon of your run. SFILIN ! ! must have the directory common to all history files that are sought to be used,! ! up to the last character the files have in common. For example if your files ! ! are ! ! /mypath/P0001-S-2000-01-01-000000-g01.h5, ! ! /mypath/P0002-S-1966-01-01-000000-g02.h5, ! ! ... ! ! /mypath/P1000-S-1687-01-01-000000-g01.h5: ! ! NL%SFILIN = '/mypath/P' ! ! ! ! 6 - Initialize with ED-2 style files without multiple sites, exactly like option ! ! 2, except that the PFT types are preserved. ! !---------------------------------------------------------------------------------------! NL%IED_INIT_MODE = 6 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! EDRES -- Expected input resolution for ED2.0 files. This is not used unless ! ! IED_INIT_MODE = 3. ! !---------------------------------------------------------------------------------------! NL%EDRES = 1.0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! SFILIN -- The meaning and the size of this variable depends on the type of run, set ! ! at variable NL%RUNTYPE. ! ! ! ! 1. INITIAL. Then this is the path+prefix of the previous ecosystem state. This has ! ! dimension of the number of grids so you can initialize each grid with a ! ! different dataset. In case only one path+prefix is given, the same will ! ! be used for every grid. Only some ecosystem variables will be set up ! ! here, and the initial condition will be in thermodynamic equilibrium. ! ! ! ! 2. HISTORY. This is the path+prefix of the history file that will be used. Only the ! ! path+prefix will be used, as the history for every grid must have come ! ! from the same simulation. ! !---------------------------------------------------------------------------------------! NL%SFILIN = '/data/sites/Santarem_Km83/s83_default.' !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! History file information. These variables are used to continue a simulation from ! ! a point other than the beginning. Time must be in UTC. ! ! ! ! IMONTHH -- the time of the history file. This is the only place you need to change ! ! IDATEH dates for a HISTORY run. You may change IMONTHZ and related in case you ! ! IYEARH want to extend the run, but yo should NOT change IMONTHA and related. ! ! ITIMEH ! !---------------------------------------------------------------------------------------! NL%ITIMEH = 0000 NL%IDATEH = 01 NL%IMONTHH = 05 NL%IYEARH = 2001 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! NZG - number of soil layers. One value for all grids. ! ! NZS - maximum number of snow/water pounding layers. This is used only for ! ! snow, if only liquid water is standing, the water will be all collapsed ! ! into a single layer, so if you are running for places where it doesn't snow ! ! a lot, leave this set to 1. One value for all grids. ! !---------------------------------------------------------------------------------------! NL%NZG = 16 NL%NZS = 4 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! ISOILFLG -- this controls which soil type input you want to use. ! ! 1. Read in from a dataset I will provide in the SOIL_DATABASE variable a ! ! few lines below. ! ! below. ! ! 2. No data available, I will use constant values I will provide in ! ! NSLCON or by prescribing the fraction of sand and clay (see SLXSAND ! ! and SLXCLAY). ! !---------------------------------------------------------------------------------------! NL%ISOILFLG = 2 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! NSLCON -- ED-2 Soil classes that the model will use when ISOILFLG is set to 2. ! ! Possible values are: ! !---------------------------------------------------------------------------------------! ! 1 -- sand | 7 -- silty clay loam | 13 -- bedrock ! ! 2 -- loamy sand | 8 -- clayey loam | 14 -- silt ! ! 3 -- sandy loam | 9 -- sandy clay | 15 -- heavy clay ! ! 4 -- silt loam | 10 -- silty clay | 16 -- clayey sand ! ! 5 -- loam | 11 -- clay | 17 -- clayey silt ! ! 6 -- sandy clay loam | 12 -- peat ! !---------------------------------------------------------------------------------------! NL%NSLCON = 11 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! ISOILCOL -- LEAF-3 and ED-2 soil colour classes that the model will use when ISOILFLG ! ! is set to 2. Soil classes are from 1 to 20 (1 = lightest; 20 = darkest). ! ! The values are the same as CLM-4.0. The table is the albedo for visible ! ! and near infra-red. ! !---------------------------------------------------------------------------------------! ! ! ! |-----------------------------------------------------------------------| ! ! | | Dry soil | Saturated | | Dry soil | Saturated | ! ! | Class |-------------+-------------| Class +-------------+-------------| ! ! | | VIS | NIR | VIS | NIR | | VIS | NIR | VIS | NIR | ! ! |-------+------+------+------+------+-------+------+------+------+------| ! ! | 1 | 0.36 | 0.61 | 0.25 | 0.50 | 11 | 0.24 | 0.37 | 0.13 | 0.26 | ! ! | 2 | 0.34 | 0.57 | 0.23 | 0.46 | 12 | 0.23 | 0.35 | 0.12 | 0.24 | ! ! | 3 | 0.32 | 0.53 | 0.21 | 0.42 | 13 | 0.22 | 0.33 | 0.11 | 0.22 | ! ! | 4 | 0.31 | 0.51 | 0.20 | 0.40 | 14 | 0.20 | 0.31 | 0.10 | 0.20 | ! ! | 5 | 0.30 | 0.49 | 0.19 | 0.38 | 15 | 0.18 | 0.29 | 0.09 | 0.18 | ! ! | 6 | 0.29 | 0.48 | 0.18 | 0.36 | 16 | 0.16 | 0.27 | 0.08 | 0.16 | ! ! | 7 | 0.28 | 0.45 | 0.17 | 0.34 | 17 | 0.14 | 0.25 | 0.07 | 0.14 | ! ! | 8 | 0.27 | 0.43 | 0.16 | 0.32 | 18 | 0.12 | 0.23 | 0.06 | 0.12 | ! ! | 9 | 0.26 | 0.41 | 0.15 | 0.30 | 19 | 0.10 | 0.21 | 0.05 | 0.10 | ! ! | 10 | 0.25 | 0.39 | 0.14 | 0.28 | 20 | 0.08 | 0.16 | 0.04 | 0.08 | ! ! |-----------------------------------------------------------------------| ! ! ! ! Soil type 21 is a special case in which we use the albedo method that used to be ! ! the default in ED-2.1. ! !---------------------------------------------------------------------------------------! NL%ISOILCOL = 21 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! These variables are used to define the soil properties when you don't want to use ! ! the standard soil classes. ! ! ! ! SLXCLAY -- Prescribed fraction of clay [0-1] ! ! SLXSAND -- Prescribed fraction of sand [0-1]. ! ! ! ! They are used only when ISOILFLG is 2, both values are between 0. and 1., and ! ! theira sum doesn't exceed 1. Otherwise standard ED values will be used instead. ! !---------------------------------------------------------------------------------------! NL%SLXCLAY = 0.59 NL%SLXSAND = 0.39 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! Soil grid and initial conditions if no file is provided: ! ! ! ! SLZ - soil depth in m. Values must be negative and go from the deepest layer to ! ! the top. ! ! SLMSTR - this is the initial soil moisture, now given as the soil moisture index. ! ! Values can be fraction, in which case they will be linearly interpolated ! ! between the special points (e.g. 0.5 will put soil moisture half way ! ! between the wilting point and field capacity). ! ! -1 = dry air soil moisture ! ! 0 = wilting point ! ! 1 = field capacity ! ! 2 = porosity (saturation) ! ! STGOFF - initial temperature offset (soil temperature = air temperature + offset) ! !---------------------------------------------------------------------------------------! NL%SLZ = -8.000, -6.959, -5.995, -5.108, -4.296, -3.560, -2.897, -2.307, -1.789, -1.340, -0.961, -0.648, -0.400, -0.215, -0.089, -0.020 NL%SLMSTR = 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00 NL%STGOFF = 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! Input databases ! ! VEG_DATABASE -- vegetation database, used only to determine the land/water mask. ! ! Fill with the path and the prefix. ! ! SOIL_DATABASE -- soil database, used to determine the soil type. Fill with the ! ! path and the prefix. ! ! LU_DATABASE -- land-use change disturbance rates database, used only when ! ! IANTH_DISTURB is set to 1. Fill with the path and the prefix. ! ! PLANTATION_FILE -- plantation fraction file. In case you don't have such a file or ! ! you do not want to use it, you must leave this variable empty: ! ! (NL%PLANTATION_FILE = '' ! ! THSUMS_DATABASE -- input directory with dataset to initialise chilling degrees and ! ! growing degree days, which is used to drive the cold-deciduous ! ! phenology (you must always provide this, even when your PFTs are ! ! not cold deciduous). ! ! ED_MET_DRIVER_DB -- File containing information for meteorological driver ! ! instructions (the "header" file). ! ! SOILSTATE_DB -- Dataset in case you want to provide the initial conditions of ! ! soil temperature and moisture. ! ! SOILDEPTH_DB -- Dataset in case you want to read in soil depth information. ! !---------------------------------------------------------------------------------------! NL%VEG_DATABASE = '/data/oge2OLD/OGE2_' NL%SOIL_DATABASE = '/data/faoOLD/FAO_' NL%LU_DATABASE = '/data/ed_inputs/glu/' NL%PLANTATION_FILE = '' NL%THSUMS_DATABASE = '/data/ed_inputs/' NL%ED_MET_DRIVER_DB = '/data/sites/Santarem_Km83/ED_MET_DRIVER_HEADER' NL%SOILSTATE_DB = '' NL%SOILDEPTH_DB = '' !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! ISOILSTATEINIT -- Variable controlling how to initialise the soil temperature and ! ! moisture ! ! 0. Use SLMSTR and STGOFF. ! ! 1. Read from SOILSTATE_DB. ! ! ISOILDEPTHFLG -- Variable controlling how to initialise soil depth ! ! 0. Constant, always defined by the first SLZ layer. ! ! 1. Read from SOILDEPTH_DB. ! !---------------------------------------------------------------------------------------! NL%ISOILSTATEINIT = 0 NL%ISOILDEPTHFLG = 0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! ISOILBC -- This controls the soil moisture boundary condition at the bottom. If ! ! unsure, use 0 for short-term simulations (couple of days), and 1 for long- ! ! -term simulations (months to years). ! ! 0. Bedrock. Flux from the bottom of the bottommost layer is set to 0. ! ! 1. Gravitational flow. The flux from the bottom of the bottommost layer ! ! is due to gradient of height only. ! ! 2. Super drainage. Soil moisture of the ficticious layer beneath the ! ! bottom is always at dry air soil moisture. ! ! 3. Half-way. Assume that the fictious layer beneath the bottom is always ! ! at field capacity. ! ! 4. Aquifer. Soil moisture of the ficticious layer beneath the bottom is ! ! always at saturation. ! !---------------------------------------------------------------------------------------! NL%ISOILBC = 1 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! IVEGT_DYNAMICS -- The vegetation dynamics scheme. ! ! 0. No vegetation dynamics, the initial state will be preserved, ! ! even though the model will compute the potential values. This ! ! option is useful for theoretical simulations only. ! ! 1. Normal ED vegetation dynamics (Moorcroft et al 2001). ! ! The normal option for almost any simulation. ! !---------------------------------------------------------------------------------------! NL%IVEGT_DYNAMICS = 1 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! IBIGLEAF -- Do you want to run ED as a 'big leaf' model? ! ! 0. No, use the standard size- and age-structure (Moorcroft et al. 2001) ! ! This is the recommended method for most applications. ! ! 1. 'big leaf' ED: this will have no horizontal or vertical hetero- ! ! geneities; 1 patch per PFT and 1 cohort per patch; no vertical ! ! growth, recruits will 'appear' instantaneously at maximum height. ! ! ! ! N.B. if you set IBIGLEAF to 1, you MUST turn off the crown model (CROWN_MOD = 0) ! !---------------------------------------------------------------------------------------! NL%IBIGLEAF = 0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! INTEGRATION_SCHEME -- The biophysics integration scheme. ! ! 0. Euler step. The fastest, but it doesn't estimate ! ! errors. ! ! 1. Fourth-order Runge-Kutta method. ED-2.1 default method ! ! 2. Heun's method (a second-order Runge-Kutta). ! ! 3. Hybrid Stepping (BDF2 implicit step for the canopy air and ! ! leaf temp, forward Euler for else, under development). ! !---------------------------------------------------------------------------------------! NL%INTEGRATION_SCHEME = 1 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! RK4_TOLERANCE -- This is the relative tolerance for Runge-Kutta or Heun's ! ! integration. Larger numbers will make runs go faster, at the ! ! expense of being less accurate. Currently the valid range is ! ! between 1.e-7 and 1.e-1, but recommended values are between 1.e-4 ! ! and 1.e-2. ! !---------------------------------------------------------------------------------------! NL%RK4_TOLERANCE = 0.01 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! IBRANCH_THERMO -- This determines whether branches should be included in the ! ! vegetation thermodynamics and radiation or not. ! ! 0. No branches in energy/radiation (ED-2.1 default); ! ! 1. Branches are accounted in the energy and radiation. Branchwood ! ! and leaf are treated separately in the canopy radiation scheme, ! ! but solved as a single pool in the biophysics integration. ! ! 2. Similar to 1, but branches are treated as separate pools in the ! ! biophysics (thus doubling the number of prognostic variables). ! !---------------------------------------------------------------------------------------! NL%IBRANCH_THERMO = 1 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! IPHYSIOL -- This variable will determine the functional form that will control how ! ! the various parameters will vary with temperature, and how the CO2 ! ! compensation point for gross photosynthesis (Gamma*) will be found. ! ! Options are: ! ! ! ! 0 -- Original ED-2.1, we use the "Arrhenius" function as in Foley et al. (1996) and ! ! Moorcroft et al. (2001). Gamma* is found using the parameters for tau as in ! ! Foley et al. (1996). ! ! 1 -- Modified ED-2.1. In this case Gamma* is found using the Michaelis-Mentel ! ! coefficients for CO2 and O2, as in Farquhar et al. (1980) and in CLM. ! ! 2 -- Collatz et al. (1991). We use the power (Q10) equations, with Collatz et al. ! ! parameters for compensation point, and the Michaelis-Mentel coefficients. The ! ! correction for high and low temperatures are the same as in Moorcroft et al. ! ! (2001). ! ! 3 -- Same as 2, except that we find Gamma* as in Farquhar et al. (1980) and in CLM. ! !---------------------------------------------------------------------------------------! NL%IPHYSIOL = 2 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! IALLOM -- Which allometry to use (this mostly affects tropical PFTs. Temperate PFTs ! ! will use the new root allometry and the maximum crown area if IALLOM is set ! ! to 1 or 2). ! ! 0. Original ED-2.1 ! ! 1. a. The coefficients for structural biomass are set so the total AGB ! ! is similar to Baker et al. (2004), equation 2. Balive is the ! ! default ED-2.1; ! ! b. Experimental root depth that makes canopy trees to have root depths ! ! of 5m and grasses/seedlings at 0.5 to have root depth of 0.5 m. ! ! c. Crown area defined as in Poorter et al. (2006), imposing maximum ! ! crown area ! ! 2. Similar to 1, but with a few extra changes. ! ! a. Height -> DBH allometry as in Poorter et al. (2006) ! ! b. Balive is retuned, using a few leaf biomass allometric equations for ! ! a few genuses in Costa Rica. References: ! ! Cole and Ewel (2006), and Calvo Alvarado et al. (2008). ! !---------------------------------------------------------------------------------------! NL%IALLOM = 2 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! IGRASS -- This controls the dynamics and growth calculation for grasses. A new ! ! grass scheme is now available where bdead = 0, height is a function of bleaf! ! and growth happens daily. ALS (3/3/12) ! ! 0: grasses behave like trees as in ED2.1 (old scheme) ! ! ! ! 1: new grass scheme as described above ! !---------------------------------------------------------------------------------------! NL%IGRASS = 0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! IPHEN_SCHEME -- It controls the phenology scheme. Even within each scheme, the ! ! actual phenology will be different depending on the PFT. ! ! ! ! -1: grasses - evergreen; ! ! tropical - evergreen; ! ! conifers - evergreen; ! ! hardwoods - cold-deciduous (Botta et al.); ! ! ! ! 0: grasses - drought-deciduous (old scheme); ! ! tropical - drought-deciduous (old scheme); ! ! conifers - evergreen; ! ! hardwoods - cold-deciduous; ! ! ! ! 1: prescribed phenology ! ! ! ! 2: grasses - drought-deciduous (new scheme); ! ! tropical - drought-deciduous (new scheme); ! ! conifers - evergreen; ! ! hardwoods - cold-deciduous; ! ! ! ! 3: grasses - drought-deciduous (new scheme); ! ! tropical - drought-deciduous (light phenology); ! ! conifers - evergreen; ! ! hardwoods - cold-deciduous; ! ! ! ! Old scheme: plants shed their leaves once instantaneous amount of available water ! ! becomes less than a critical value. ! ! New scheme: plants shed their leaves once a 10-day running average of available ! ! water becomes less than a critical value. ! !---------------------------------------------------------------------------------------! NL%IPHEN_SCHEME = 2 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! Parameters that control the phenology response to radiation, used only when ! ! IPHEN_SCHEME = 3. ! ! ! ! RADINT -- Intercept ! ! RADSLP -- Slope. ! !---------------------------------------------------------------------------------------! NL%RADINT = -11.3868 NL%RADSLP = 0.0824 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! REPRO_SCHEME -- This controls plant reproduction and dispersal. ! ! 0. Reproduction off. Useful for very short runs only. ! ! 1. Original reproduction scheme. Seeds are exchanged between ! ! patches belonging to the same site, but they can't go outside ! ! their original site. ! ! 2. Similar to 1, but seeds are exchanged between patches belonging ! ! to the same polygon, even if they are in different sites. They ! ! can't go outside their original polygon, though. This is the ! ! same as option 1 if there is only one site per polygon. ! ! 3. Similar to 2, but recruits will only be formed if their phenology ! ! status would be "leaves fully flushed". This only matters for ! ! drought deciduous plants. This option is for testing purposes ! ! only, think 50 times before using it... ! !---------------------------------------------------------------------------------------! NL%REPRO_SCHEME = 2 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! LAPSE_SCHEME -- This specifies the met lapse rate scheme: ! ! 0. No lapse rates ! ! 1. phenomenological, global ! ! 2. phenomenological, local (not yet implemented) ! ! 3. mechanistic(not yet implemented) ! !---------------------------------------------------------------------------------------! NL%LAPSE_SCHEME = 0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! CROWN_MOD -- Specifies how tree crowns are represent in the canopy radiation model, ! ! and in the turbulence scheme depending on ICANTURB. ! ! 0. ED1 default, crowns are evenly spread throughout the patch area, and ! ! cohorts are stacked on the top of each other. ! ! 1. Dietze (2008) model. Cohorts have a finite radius, and cohorts are ! ! stacked on the top of each other. ! !---------------------------------------------------------------------------------------! NL%CROWN_MOD = 0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! The following variables control the canopy radiation solver. ! ! ! ! ICANRAD -- Specifies how canopy radiation is solved. This variable sets both ! ! shortwave and longwave. ! ! 0. Two-stream model (Medvigy 2006), with the possibility to apply ! ! finite crown area to direct shortwave radiation. ! ! 1. Multiple-scattering model (Zhao and Qualls 2005,2006), with the ! ! possibility to apply finite crown area to all radiation fluxes. ! ! LTRANS_VIS -- Leaf transmittance for tropical plants - Visible/PAR ! ! LTRANS_NIR -- Leaf transmittance for tropical plants - Near Infrared ! ! LREFLECT_VIS -- Leaf reflectance for tropical plants - Visible/PAR ! ! LREFLECT_NIR -- Leaf reflectance for tropical plants - Near Infrared ! ! ORIENT_TREE -- Leaf orientation factor for tropical trees. Extremes are: ! ! -1. All leaves are oriented in the vertical ! ! 0. Leaf orientation is perfectly random ! ! 1. All leaves are oriented in the horizontal ! ! In practice, acceptable values range from -0.4 and 0.6 ! ! ORIENT_GRASS -- Leaf orientation factor for tropical grasses. Extremes are: ! ! -1. All leaves are oriented in the vertical ! ! 0. Leaf orientation is perfectly random ! ! 1. All leaves are oriented in the horizontal ! ! In practice, acceptable values range from -0.4 and 0.6 ! ! CLUMP_TREE -- Clumping factor for tropical trees. Extremes are: ! ! lim -> 0. Black hole (0 itself is unacceptable) ! ! 1. Homogeneously spread over the layer (i.e., no clumping) ! ! CLUMP_GRASS -- Clumping factor for tropical grasses. Extremes are: ! ! lim -> 0. Black hole (0 itself is unacceptable) ! ! 1. Homogeneously spread over the layer (i.e., no clumping) ! !---------------------------------------------------------------------------------------! NL%ICANRAD = 0 NL%LTRANS_VIS = 0.050 NL%LTRANS_NIR = 0.230 NL%LREFLECT_VIS = 0.100 NL%LREFLECT_NIR = 0.460 NL%ORIENT_TREE = 0.100 NL%ORIENT_GRASS = 0.000 NL%CLUMP_TREE = 0.800 NL%CLUMP_GRASS = 1.000 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! DECOMP_SCHEME -- This specifies the dependence of soil decomposition on temperature. ! ! 0. ED-2.0 default, the original exponential ! ! 1. Lloyd and Taylor (1994) model ! ! [[option 1 requires parameters to be set in xml]] ! !---------------------------------------------------------------------------------------! NL%DECOMP_SCHEME = 0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! H2O_PLANT_LIM -- this determines whether plant photosynthesis can be limited by ! ! soil moisture, the FSW, defined as FSW = Supply / (Demand + Supply). ! ! ! ! Demand is always the transpiration rates in case soil moisture is ! ! not limiting (the psi_0 term times LAI). The supply is determined ! ! by Kw * nplant * Broot * Available_Water, and the definition of ! ! available water changes depending on H2O_PLANT_LIM: ! ! 0. Force FSW = 1 (effectively available water is infinity). ! ! 1. Available water is the total soil water above wilting point, ! ! integrated across all layers within the rooting zone. ! ! 2. Available water is the soil water at field capacity minus ! ! wilting point, scaled by the so-called wilting factor: ! ! (psi(k) - (H - z(k)) - psi_wp) / (psi_fc - psi_wp) ! ! where psi is the matric potentital at layer k, z is the layer ! ! depth, H it the crown height and psi_fc and psi_wp are the ! ! matric potentials at wilting point and field capacity. ! !---------------------------------------------------------------------------------------! NL%H2O_PLANT_LIM = 2 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! The following variables are factors that control photosynthesis and respiration. ! ! Notice that some of them are relative values whereas others are absolute. ! ! ! ! VMFACT_C3 -- Factor multiplying the default Vm0 for C3 plants (1.0 = default). ! ! VMFACT_C4 -- Factor multiplying the default Vm0 for C4 plants (1.0 = default). ! ! MPHOTO_TRC3 -- Stomatal slope (M) for tropical C3 plants ! ! MPHOTO_TEC3 -- Stomatal slope (M) for conifers and temperate C3 plants ! ! MPHOTO_C4 -- Stomatal slope (M) for C4 plants. ! ! BPHOTO_BLC3 -- cuticular conductance for broadleaf C3 plants [umol/m2/s] ! ! BPHOTO_NLC3 -- cuticular conductance for needleleaf C3 plants [umol/m2/s] ! ! BPHOTO_C4 -- cuticular conductance for C4 plants [umol/m2/s] ! ! KW_GRASS -- Water conductance for trees, in m2/yr/kgC_root. This is used only ! ! when H2O_PLANT_LIM is not 0. ! ! KW_TREE -- Water conductance for grasses, in m2/yr/kgC_root. This is used only ! ! when H2O_PLANT_LIM is not 0. ! ! GAMMA_C3 -- The dark respiration factor (gamma) for C3 plants. Subtropical ! ! conifers will be scaled by GAMMA_C3 * 0.028 / 0.02 ! ! GAMMA_C4 -- The dark respiration factor (gamma) for C4 plants. ! ! D0_GRASS -- The transpiration control in gsw (D0) for ALL grasses. ! ! D0_TREE -- The transpiration control in gsw (D0) for ALL trees. ! ! ALPHA_C3 -- Quantum yield of ALL C3 plants. This is only applied when ! ! QUANTUM_EFFICIENCY_T = 0. ! ! ALPHA_C4 -- Quantum yield of C4 plants. This is always applied. ! ! KLOWCO2IN -- The coefficient that controls the PEP carboxylase limited rate of ! ! carboxylation for C4 plants. ! ! RRFFACT -- Factor multiplying the root respiration factor for ALL PFTs. ! ! (1.0 = default). ! ! GROWTHRESP -- The actual growth respiration factor (C3/C4 tropical PFTs only). ! ! (1.0 = default). ! ! LWIDTH_GRASS -- Leaf width for grasses, in metres. This controls the leaf boundary ! ! layer conductance (gbh and gbw). ! ! LWIDTH_BLTREE -- Leaf width for trees, in metres. This controls the leaf boundary ! ! layer conductance (gbh and gbw). This is applied to broadleaf trees ! ! only. ! ! LWIDTH_NLTREE -- Leaf width for trees, in metres. This controls the leaf boundary ! ! layer conductance (gbh and gbw). This is applied to conifer trees ! ! only. ! ! Q10_C3 -- Q10 factor for C3 plants (used only if IPHYSIOL is set to 2 or 3). ! ! Q10_C4 -- Q10 factor for C4 plants (used only if IPHYSIOL is set to 2 or 3). ! !---------------------------------------------------------------------------------------! NL%VMFACT_C3 = 1 NL%VMFACT_C4 = 1 NL%MPHOTO_TRC3 = 9 NL%MPHOTO_TEC3 = 7.2 NL%MPHOTO_C4 = 5.2 NL%BPHOTO_BLC3 = 10000 NL%BPHOTO_NLC3 = 1000 NL%BPHOTO_C4 = 10000 NL%KW_GRASS = 900 NL%KW_TREE = 600 NL%GAMMA_C3 = 0.0145 NL%GAMMA_C4 = 0.035 NL%D0_GRASS = 0.016 NL%D0_TREE = 0.016 NL%ALPHA_C3 = 0.08 NL%ALPHA_C4 = 0.055 NL%KLOWCO2IN = 4000 NL%RRFFACT = 1 NL%GROWTHRESP = 0.333 NL%LWIDTH_GRASS = 0.05 NL%LWIDTH_BLTREE = 0.1 NL%LWIDTH_NLTREE = 0.05 NL%Q10_C3 = 2.4 NL%Q10_C4 = 2.4 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! THETACRIT -- Leaf drought phenology threshold. The sign matters here: ! ! >= 0. -- This is the relative soil moisture above the wilting point ! ! below which the drought-deciduous plants will start shedding ! ! their leaves ! ! < 0. -- This is the soil potential in MPa below which the drought- ! ! -deciduous plants will start shedding their leaves. The wilt- ! ! ing point is by definition -1.5MPa, so make sure that the value ! ! is above -1.5. ! !---------------------------------------------------------------------------------------! NL%THETACRIT = -1.20 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! QUANTUM_EFFICIENCY_T -- Which quantum yield model should to use for C3 plants ! ! 0. Original ED-2.1, quantum efficiency is constant. ! ! 1. Quantum efficiency varies with temperature following ! ! Ehleringer (1978) polynomial fit. ! !---------------------------------------------------------------------------------------! NL%QUANTUM_EFFICIENCY_T = 0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! N_PLANT_LIM -- This controls whether plant photosynthesis can be limited by nitrogen. ! ! 0. No limitation ! ! 1. ED-2.1 nitrogen limitation model. ! !---------------------------------------------------------------------------------------! NL%N_PLANT_LIM = 0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! N_DECOMP_LIM -- This controls whether decomposition can be limited by nitrogen. ! ! 0. No limitation ! ! 1. ED-2.1 nitrogen limitation model. ! !---------------------------------------------------------------------------------------! NL%N_DECOMP_LIM = 0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! The following parameters adjust the fire disturbance in the model. ! ! INCLUDE_FIRE -- Which threshold to use for fires. ! ! 0. No fires; ! ! 1. (deprecated) Fire will be triggered with enough biomass and ! ! integrated ground water depth less than a threshold. Based on ! ! ED-1, the threshold assumes that the soil is 1 m, so deeper ! ! soils will need to be much drier to allow fires to happen and ! ! often will never allow fires. ! ! 2. Fire will be triggered with enough biomass and the total soil ! ! water at the top 75 cm falls below a threshold. ! ! FIRE_PARAMETER -- If fire happens, this will control the intensity of the disturbance ! ! given the amount of fuel (currently the total above-ground ! ! biomass). ! ! SM_FIRE -- This is used only when INCLUDE_FIRE = 2. The sign here matters. ! ! >= 0. - Minimum relative soil moisture above dry air of the top 1m ! ! that will prevent fires to happen. ! ! < 0. - Minimum mean soil moisture potential in MPa of the top 1m ! ! that will prevent fires to happen. The dry air soil ! ! potential is defined as -3.1 MPa, so make sure SM_FIRE is ! ! greater than this value. ! !---------------------------------------------------------------------------------------! NL%INCLUDE_FIRE = 0 NL%FIRE_PARAMETER = 0.5 NL%SM_FIRE = -1.40 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! IANTH_DISTURB -- This flag controls whether to include anthropogenic disturbances ! ! such as land clearing, abandonment, and logging. ! ! 0. no anthropogenic disturbance. ! ! 1. use anthropogenic disturbance dataset. ! !---------------------------------------------------------------------------------------! NL%IANTH_DISTURB = 0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! ICANTURB -- This flag controls the canopy roughness. ! ! 0. Based on Leuning et al. (1995), wind is computed using the similarity ! ! theory for the top cohort, and they are extinguished with cumulative ! ! LAI. If using CROWN_MOD 1 or 2, this will use local LAI and average ! ! by crown area. ! ! 1. The default ED-2.1 scheme, except that it uses the zero-plane ! ! displacement height. ! ! 2. This uses the method of Massman (1997) using constant drag and no ! ! sheltering factor. ! ! 3. This is also based on Massman (1997), but with the option of varying ! ! the drag and sheltering within the canopy. ! ! 4. Same as 0, but if finds the ground conductance following CLM ! ! technical note (equations 5.98-5.100). ! !---------------------------------------------------------------------------------------! NL%ICANTURB = 2 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! ISFCLYRM -- Similarity theory model. The model that computes u*, T*, etc... ! ! 1. BRAMS default, based on Louis (1979). It uses empirical relations to ! ! estimate the flux based on the bulk Richardson number ! ! ! ! All models below use an interative method to find z/L, and the only change ! ! is the functional form of the psi functions. ! ! ! ! 2. Oncley and Dudhia (1995) model, based on MM5. ! ! 3. Beljaars and Holtslag (1991) model. Similar to 2, but it uses an alternative ! ! method for the stable case that mixes more than the OD95. ! ! 4. CLM (2004). Similar to 2 and 3, but they have special functions to deal with ! ! very stable and very stable cases. ! !---------------------------------------------------------------------------------------! NL%ISFCLYRM = 3 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! IED_GRNDVAP -- Methods to find the ground -> canopy conductance. ! ! 0. Modified Lee Pielke (1992), adding field capacity, but using beta factor ! ! without the square, like in Noilhan and Planton (1989). This is the closest ! ! to the original ED-2.0 and LEAF-3, and it is also the recommended one. ! ! 1. Test # 1 of Mahfouf and Noilhan (1991) ! ! 2. Test # 2 of Mahfouf and Noilhan (1991) ! ! 3. Test # 3 of Mahfouf and Noilhan (1991) ! ! 4. Test # 4 of Mahfouf and Noilhan (1991) ! ! 5. Combination of test #1 (alpha) and test #2 (soil resistance). ! ! In all cases the beta term is modified so it approaches zero as soil moisture goes ! ! to dry air soil. ! !---------------------------------------------------------------------------------------! NL%IED_GRNDVAP = 0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! The following variables are used to control the similarity theory model. For the ! ! meaning of these parameters, check Beljaars and Holtslag (1991). ! ! GAMM -- gamma coefficient for momentum, unstable case (dimensionless) ! ! Ignored when ISTAR = 1 ! ! GAMH -- gamma coefficient for heat, unstable case (dimensionless) ! ! Ignored when ISTAR = 1 ! ! TPRANDTL -- Turbulent Prandtl number ! ! Ignored when ISTAR = 1 ! ! RIBMAX -- maximum bulk Richardson number. ! ! LEAF_MAXWHC -- Maximum water that can be intercepted by leaves, in kg/m2leaf. ! !---------------------------------------------------------------------------------------! NL%GAMM = 13.0 NL%GAMH = 13.0 NL%TPRANDTL = 0.74 NL%RIBMAX = 0.50 NL%LEAF_MAXWHC = 0.11 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! IPERCOL -- This controls percolation and infiltration. ! ! 0. Default method. Assumes soil conductivity constant and for the ! ! temporary surface water, it sheds liquid in excess of a 1:9 liquid- ! ! -to-ice ratio through percolation. Temporary surface water exists ! ! only if the top soil layer is at saturation. ! ! 1. Constant soil conductivity, and it uses the percolation model as in ! ! Anderson (1976) NOAA technical report NWS 19. Temporary surface ! ! water may exist after a heavy rain event, even if the soil doesn't ! ! saturate. Recommended value. ! ! 2. Soil conductivity decreases with depth even for constant soil moisture ! ! , otherwise it is the same as 1. ! !---------------------------------------------------------------------------------------! NL%IPERCOL = 1 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! The following variables control the plant functional types (PFTs) that will be ! ! used in this simulation. ! ! ! ! INCLUDE_THESE_PFT -- a list containing all the PFTs you want to include in this run ! ! AGRI_STOCK -- which PFT should be used for agriculture ! ! (used only when IANTH_DISTURB = 1) ! ! PLANTATION_STOCK -- which PFT should be used for plantation ! ! (used only when IANTH_DISTURB = 1) ! ! ! ! PFT table ! !---------------------------------------------------------------------------------------! ! 1 - C4 grass | 9 - early temperate deciduous ! ! 2 - early tropical | 10 - mid temperate deciduous ! ! 3 - mid tropical | 11 - late temperate deciduous ! ! 4 - late tropical | 12:15 - agricultural PFTs ! ! 5 - temperate C3 grass | 16 - Subtropical C3 grass ! ! 6 - northern pines | (C4 grass with C3 photo). ! ! 7 - southern pines | 17 - "Araucaria" (non-optimised ! ! 8 - late conifers | Southern Pines). ! !---------------------------------------------------------------------------------------! NL%INCLUDE_THESE_PFT = 1,2,3,4,16 NL%AGRI_STOCK = 1 NL%PLANTATION_STOCK = 3 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! PFT_1ST_CHECK -- What to do if the initialisation file has a PFT that is not listed ! ! in INCLUDE_THESE_PFT (ignored if IED_INIT_MODE is -1 or 0) ! ! 0. Stop the run ! ! 1. Add the PFT in the INCLUDE_THESE_PFT list ! ! 2. Ignore the cohort ! !---------------------------------------------------------------------------------------! NL%PFT_1ST_CHECK = 0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! The following variables control the size of sub-polygon structures in ED-2. ! ! MAXSITE -- This is the strict maximum number of sites that each polygon can ! ! contain. Currently this is used only when the user wants to run ! ! the same polygon with multiple soil types. If there aren't that ! ! many different soil types with a minimum area (check MIN_SITE_AREA ! ! below), then the model will allocate just the amount needed. ! ! MAXPATCH -- If number of patches in a given site exceeds MAXPATCH, force patch ! ! fusion. If MAXPATCH is 0, then fusion will never happen. If ! ! MAXPATCH is negative, then the absolute value is used only during ! ! the initialization, and fusion will never happen again. Notice ! ! that if the patches are too different, then the actual number of ! ! patches in a site may exceed MAXPATCH. ! ! MAXCOHORT -- If number of cohorts in a given patch exceeds MAXCOHORT, force ! ! cohort fusion. If MAXCOHORT is 0, then fusion will never happen. ! ! If MAXCOHORT is negative, then the absolute value is used only ! ! during the initialization, and fusion will never happen again. ! ! Notice that if the cohorts are too different, then the actual ! ! number of cohorts in a patch may exceed MAXCOHORT. ! ! MIN_SITE_AREA -- This is the minimum fraction area of a given soil type that allows ! ! a site to be created (ignored if IED_INIT_MODE is set to 3). ! ! MIN_PATCH_AREA -- This is the minimum fraction area of a given soil type that allows ! ! a site to be created (ignored if IED_INIT_MODE is set to 3). ! !---------------------------------------------------------------------------------------! NL%MAXSITE = 1 NL%MAXPATCH = 10 NL%MAXCOHORT = 40 NL%MIN_SITE_AREA = 0.005 NL%MIN_PATCH_AREA = 0.005 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! ZROUGH -- constant roughness, in metres, if for all domain ! !---------------------------------------------------------------------------------------! NL%ZROUGH = 0.1 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! Treefall disturbance parameters. ! ! TREEFALL_DISTURBANCE_RATE -- Sign-dependent treefall disturbance rate: ! ! > 0. usual disturbance rate, in 1/years; ! ! = 0. No treefall disturbance; ! ! < 0. Treefall will be added as a mortality rate (it ! ! will kill plants, but it won't create a new patch). ! ! TIME2CANOPY -- Minimum patch age for treefall disturbance to happen. ! ! If TREEFALL_DISTURBANCE_RATE = 0., this value will be ! ! ignored. If this value is different than zero, then ! ! TREEFALL_DISTURBANCE_RATE is internally adjusted so the ! ! average patch age is still 1/TREEFALL_DISTURBANCE_RATE ! !---------------------------------------------------------------------------------------! NL%TREEFALL_DISTURBANCE_RATE = 0.014 NL%TIME2CANOPY = 0.0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! RUNOFF_TIME -- In case a temporary surface water (TSW) is created, this is the "e- ! ! -folding lifetime" of the TSW in seconds due to runoff. If you don't ! ! want runoff to happen, set this to 0. ! !---------------------------------------------------------------------------------------! NL%RUNOFF_TIME = 3600.0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! The following variables control the minimum values of various velocities in the ! ! canopy. This is needed to avoid the air to be extremely still, or to avoid singular- ! ! ities. When defining the values, keep in mind that UBMIN >= UGBMIN >= USTMIN. ! ! ! ! UBMIN -- minimum wind speed at the top of the canopy air space [ m/s] ! ! UGBMIN -- minimum wind speed at the leaf level [ m/s] ! ! USTMIN -- minimum friction velocity, u*, in m/s. [ m/s] ! !---------------------------------------------------------------------------------------! NL%UBMIN = 0.65 NL%UGBMIN = 0.25 NL%USTMIN = 0.05 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! Control parameters for printing to standard output. Any variable can be printed ! ! to standard output as long as it is one dimensional. Polygon variables have been ! ! tested, no gaurtantees for other hierarchical levels. Choose any variables that are ! ! defined in the variable table fill routine in ed_state_vars.f90. Choose the start ! ! and end index of the polygon,site,patch or cohort. It should work in parallel. The ! ! indices are global indices of the entire domain. The are printed out in rows of 10 ! ! columns each. ! ! ! ! IPRINTPOLYS -- 0. Do not print information to screen ! ! 1. Print polygon arrays to screen, use variables described below to ! ! determine which ones and how ! ! NPVARS -- Number of variables to be printed ! ! PRINTVARS -- List of variables to be printed ! ! PFMTSTR -- The standard fortran format for the prints. One format per variable ! ! IPMIN -- First polygon (absolute index) to be print ! ! IPMAX -- Last polygon (absolute index) to print ! !---------------------------------------------------------------------------------------! NL%IPRINTPOLYS = 0 NL%NPVARS = 1 NL%PRINTVARS = 'AVG_PCPG','AVG_CAN_TEMP','AVG_VAPOR_AC','AVG_CAN_SHV' NL%PFMTSTR = 'f10.8','f5.1','f7.2','f9.5' NL%IPMIN = 1 NL%IPMAX = 60 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! Variables that control the meteorological forcing. ! ! ! ! IMETTYPE -- Format of the meteorological dataset ! ! 0. ASCII (deprecated) ! ! 1. HDF5 ! ! ISHUFFLE -- How to choose an year outside the meterorological data range (see ! ! METCYC1 and METCYCF). ! ! 0. Sequentially cycle over years ! ! 1. Randomly pick the years, using the same sequence. This has worked ! ! with gfortran running in Mac OS X system, but it acts like option 2 ! ! when running ifort. ! ! 2. Randomly pick the years, choosing a different sequence each time ! ! the model is run. ! ! IMETCYC1 -- First year with meteorological information ! ! IMETCYCF -- Last year with meteorological information ! ! IMETAVG -- How the input radiation was originally averaged. You must tell this ! ! because ED-2.1 can make a interpolation accounting for the cosine of ! ! zenith angle. ! ! -1. I don't know, use linear interpolation. ! ! 0. No average, the values are instantaneous ! ! 1. Averages ending at the reference time ! ! 2. Averages beginning at the reference time ! ! 3. Averages centred at the reference time ! ! IMETRAD -- What should the model do with the input short wave radiation? ! ! 0. Nothing, use it as is. ! ! 1. Add them together, then use the SiB method to break radiation down ! ! into the four components (PAR direct, PAR diffuse, NIR direct, ! ! NIR diffuse). ! ! 2. Add then together, then use the method by Weiss and Norman (1985) ! ! to break radiation down to the four components. ! ! 3. Gloomy -- All radiation goes to diffuse. ! ! 4. Sesame street -- all radiation goes to direct, except at night. ! ! INITIAL_CO2 -- Initial value for CO2 in case no CO2 is provided at the meteorological ! ! driver dataset [Units: µmol/mol] ! !---------------------------------------------------------------------------------------! NL%IMETTYPE = 1 NL%ISHUFFLE = 0 NL%METCYC1 = 2000 NL%METCYCF = 2003 NL%IMETAVG = 1 NL%IMETRAD = 2 NL%INITIAL_CO2 = 378.0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! The following variables control the phenology prescribed from observations: ! ! ! ! IPHENYS1 -- First year for spring phenology ! ! IPHENYSF -- Final year for spring phenology ! ! IPHENYF1 -- First year for fall/autumn phenology ! ! IPHENYFF -- Final year for fall/autumn phenology ! ! PHENPATH -- path and prefix of the prescribed phenology data. ! ! ! ! If the years don't cover the entire simulation period, they will be recycled. ! !---------------------------------------------------------------------------------------! NL%IPHENYS1 = 1992 NL%IPHENYSF = 2003 NL%IPHENYF1 = 1992 NL%IPHENYFF = 2003 NL%PHENPATH = '/n/moorcroft_data/data/ed2_data/phenology/phenology' !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! These are some additional configuration files. ! ! IEDCNFGF -- XML file containing additional parameter settings. If you don't have ! ! one, leave it empty ! ! EVENT_FILE -- file containing specific events that must be incorporated into the ! ! simulation. ! ! PHENPATH -- path and prefix of the prescribed phenology data. ! !---------------------------------------------------------------------------------------! NL%IEDCNFGF = 'config.xml' NL%EVENT_FILE = 'myevents.xml' !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! The following variables are used to control the detailed output for debugging ! ! purposes. ! ! ! ! IDETAILED -- This flag controls the possible detailed outputs, mostly used for ! ! debugging purposes. Notice that this doesn't replace the normal debug- ! ! ger options, the idea is to provide detailed output to check bad ! ! assumptions. The options are additive, and the indices below represent ! ! the different types of output: ! ! ! ! 1 -- Detailed budget (every DTLSM) ! ! 2 -- Detailed photosynthesis (every DTLSM) ! ! 4 -- Detailed output from the integrator (every HDID) ! ! 8 -- Thermodynamic bounds for sanity check (every DTLSM) ! ! 16 -- Daily error stats (which variable caused the time step to shrink) ! ! 32 -- Allometry parameters, and minimum and maximum sizes ! ! (two files, only at the beginning) ! ! ! ! In case you don't want any detailed output (likely for most runs), set ! ! IDETAILED to zero. In case you want to generate multiple outputs, add ! ! the number of the sought options: for example, if you want detailed ! ! photosynthesis and detailed output from the integrator, set IDETAILED ! ! to 6 (2 + 4). Any combination of the above outputs is acceptable, al- ! ! though all but the last produce a sheer amount of txt files, in which ! ! case you may want to look at variable PATCH_KEEP. It is also a good ! ! idea to set IVEGT_DYNAMICS to 0 when using the first five outputs. ! ! ! ! ! ! PATCH_KEEP -- This option will eliminate all patches except one from the initial- ! ! isation. This is only used when one of the first five types of ! ! detailed output is active, otherwise it will be ignored. Options are: ! ! -2. Keep only the patch with the lowest potential LAI ! ! -1. Keep only the patch with the highest potential LAI ! ! 0. Keep all patches. ! ! > 0. Keep the patch with the provided index. In case the index is ! ! not valid, the model will crash. ! !---------------------------------------------------------------------------------------! NL%IDETAILED = 0 NL%PATCH_KEEP = 0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! IOPTINPT -- Optimization configuration. (Currently not used) ! !---------------------------------------------------------------------------------------! !NL%IOPTINPT = '' !---------------------------------------------------------------------------------------! NL%IOOUTPUT = 3 NL%IADD_SITE_MEANS = 0 NL%IADD_PATCH_MEANS = 0 NL%IADD_COHORT_MEANS = 0 NL%GROWTH_RESP_SCHEME = 0 NL%STORAGE_RESP_SCHEME = 0 NL%PLANT_HYDRO_SCHEME = 0 NL%ISTOMATA_SCHEME = 0 NL%ISTRUCT_GROWTH_SCHEME = 0 NL%TRAIT_PLASTICITY_SCHEME = 0 NL%IDDMORT_SCHEME = 0 NL%CBR_SCHEME = 0 NL%DDMORT_CONST = 0 NL%ICANRAD = 1 NL%DT_CENSUS = 60 NL%YR1ST_CENSUS = 2000 NL%MON1ST_CENSUS = 6 NL%MIN_RECRUIT_DBH = 50 $END !==========================================================================================! !==========================================================================================! Convert the docker image to singularity singularity pull docker://pecan/model-ed2-git Finally you can run the singularity image singularity exec -B ed_inputs:/data/ed_inputs -B faoOLD:/data/faoOLD -B oge2OLD:/data/oge2OLD -B sites:/data/sites -B testrun.s83:/data/testrun.s83 --pwd /data/testrun.s83 ./model-ed2-git.simg ed2.git -s Note that the -B option will mount the folder into the singularity image as the second argument (afther the :) The ed2.git command is started with the -s which will run it in single mode, and not initialize and use MPI. Once the model is finished the outputs should be available under testrun.s83. The example ED2IN file is not 100% correct and will result in the following error: output of (failed) run (click to expand) +---------------- MPI parallel info: --------------------+ + - Machnum = 0 + - Machsize = 1 +---------------- OMP parallel info: --------------------+ + - thread use: 1 + - threads max: 1 + - cpu use: 1 + - cpus max: 1 + Note: Max vals are for node, not sockets. +--------------------------------------------------------+ Reading namelist information Copying namelist !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!! WARNING! WARNING! WARNING! WARNING! WARNING! !!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -> Outfast cannot be less than frqfast. Oufast was redefined to 3600. seconds. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!! WARNING! WARNING! WARNING! WARNING! WARNING! !!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -> Outstate cannot be different than frqstate when unitstate is set to 3 (years). Oustate was set to 1. years. Oustate was redefined to 1. years. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +------------------------------------------------------------+ | Ecosystem Demography Model, version 2.2 +------------------------------------------------------------+ | Input namelist filename is ED2IN | | Single process execution on INITIAL run. +------------------------------------------------------------+ => Generating the land/sea mask. /data/oge2OLD/OGE2_HEADER -> Getting file: /data/oge2OLD/OGE2_30S060W.h5... + Work allocation, node 1; + Polygon array allocation, node 1; + Memory successfully allocated on none 1; [+] Load_Ed_Ecosystem_Params... ---------------------------------------- Treefall disturbance parameters: - LAMBDA_REF = 1.40000E-02 - LAMBDA_EFF = 1.40000E-02 - TIME2CANOPY = 0.00000E+00 ---------------------------------------- [+] Checking for XML config... ********************************************* ** WARNING! ** ** ** ** XML file wasn't found. Using default ** ** parameters in ED. ** ** (You provided config.xml). ** ** ********************************************* [+] Alloc_Soilgrid... [+] Set_Polygon_Coordinates... [+] Sfcdata_ED... [+] Load_Ecosystem_State... + Doing sequential initialization over nodes. + Initializing from ED restart file. Node: 001 [-] filelist_f: Checking prefix: /data/sites/Santarem_Km83/s83_default. + Showing first 10 files: [-] File #: 1 /data/sites/Santarem_Km83/s83_default.lat-3.018lon-54.971.css [-] File #: 2 /data/sites/Santarem_Km83/s83_default.lat-3.018lon-54.971.pss Using patch file: /data/sites/Santarem_Km83/s83_default.lat-3.018lon-54.971.pss Using cohort file: /data/sites/Santarem_Km83/s83_default.lat-3.018lon-54.971.css + Initializing phenology. Node: 001 - Reading thermal sums. + Initializing anthropogenic disturbance forcing. Node: 001 -------------------------------------------------------- Soil information: Polygon name : ts83 Longitude : -54.971 Latitude : -3.018 Prescribed sand and clay : T # of sites : 1 Site : 1 - Type : 16 - Clay fraction = 5.90000E-01 - Sand fraction = 3.90000E-01 - Silt fraction = 2.00000E-02 - SLBS = 1.22460E+01 - SLPOTS = -1.52090E-01 - SLCONS = 2.30320E-06 - Dry air soil = 2.29248E-01 - Wilting point = 2.43249E-01 - Field capacity = 3.24517E-01 - Saturation = 4.27790E-01 - Heat capacity = 1.30601E+06 -------------------------------------------------------- [+] Init_Met_Drivers... [+] Read_Met_Drivers_Init... ------------------------------ - METCYC1 = 2000 - METCYCF = 2003 - NYEARS = 2 ------------------------------ IYEAR YEAR_USE 1 2001 2 2002 ------------------------------ [+] Update_met_drivers... [+] Ed_Init_Atm... Total count in node 1 for grid 1 : POLYGONS= 1 SITES= 1 PATCHES= 18 COHORTS= 1753 Grid: 1 Poly: 1 Lon: -54.9710 Lat: -3.0180 Nplants: 0.73 Avg. LAI: 4.45 NPatches: 18 NCohorts: 660 [+] initHydrology... initHydrology | mynum= 1 ngrids= 1 mpolys= 1 msites= 1 Allocated | mynum= 1 ngrids= 1 mpolys= 1 msites= 1 Updated | mynum= 1 ngrids= 1 mpolys= 1 msites= 1 Deallocated | mynum= 1 ngrids= 1 mpolys= 1 msites= 1 [+] Filltab_Alltypes... [+] Finding frqsum... [+] Loading obstime_list File /nowhere not found! Specify OBSTIME_DB properly in ED namelist. :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -------------------------------------------------------------- !!! FATAL ERROR !!! -------------------------------------------------------------- ---> File: ed_init.F90 ---> Subroutine: read_obstime ---> Reason: OBSTIME_DB not found! -------------------------------------------------------------- ED execution halts (see previous error message)... -------------------------------------------------------------- STOP fatal_error "],["sibcasa.html", "35 SiBCASA", " 35 SiBCASA Model Information Home Page Source Code Contact Kevin Schaefer at (kevin.schaefer[at]nsidc.org License Authors Kevin Shaefer PEcAn Integration Tony Gardella, Yasin Elshorbany Introduction SiBCASA was created from the combination of the Carnegie-Ames_Stanford Approach (CASA) and the Simple Biosphere(SiB) model.They have added other features, including dynamic allocation of GPP for the growth and maintance of leaves, roots, and wood andd explicit calculation of autotrophic respiration. Schaefer, Kevin, et al. “The combined Simple Biosphere/Carnegie-Ames-Stanford Approach (SiBCASA) Model.” J. Geophys. Res 113: G03034. PEcAn configuration file additions Model specific input files Software libraries- PGI 16.5 or later https://www.pgroup.com/products/community.htm NETCDF 3.6.0 Input - Driver data: Made to use CRUNCEP data by default. Param File Initial Condition File Model configuration files The namel_sibdrv is the namelist file that controls input and output file locations. Installation notes Update $PATH to the netcdf library version 3.6 (compiled with the same PGI Fortran compiler) in: PEcAn/sim_setup/Makefile & PEcAn/SiB/sibcasa_arctic4/Makefile Change paths in: Line 379: /SiB/sibcasa_ arctic4/read_ti.F90 Line 260, 273, 286: /SiB/sibcasa_ arctic4/sibdrv_read_ncep2.F90 Update $PATH variables in: /SiB/sibcasa_arctic4/sibmerg.in IMPORTANT: update “fmt_type=‘sing_proc’” even if running on parallel computing system, since the file names will otherwise be incompatible with the outputted data..check Compile code: in ~/SiB/sibcasa_arctic4/ Make clean Make VM Currently Not Avialable on the VM "],["404.html", "Page not found", " Page not found The page you requested cannot be found (perhaps it was moved or renamed). You may want to try searching to find the page's new location, or use the table of contents to find the page you are looking for. "]]
+[["index.html", "The Predictive Ecosystem Analyzer Welcome", " The Predictive Ecosystem Analyzer By: PEcAn Team 2024-03-04 Welcome Ecosystem science, policy, and management informed by the best available data and models Our Mission: Develop and promote accessible tools for reproducible ecosystem modeling and forecasting PEcAn Website Public Chat Room Github Repository .page-inner{max-width:80% !important;} "],["project-overview.html", "1 Project Overview", " 1 Project Overview The Predictive Ecosystem Analyzer (PEcAn) is an integrated informatics toolbox for ecosystem modeling (Dietze et al. 2013, LeBauer et al. 2013). PEcAn consists of: An application program interface (API) that encapsulates an ecosystem model, providing a common interface, inputs, and output. Core utilities for handling and tracking model runs and the flows of information and uncertainties into and out of models and analyses An accessible web-based user interface and visualization tools An extensible collection of modules to handle specific types of analyses (sensitivity, uncertainty, ensemble), model-data syntheses (benchmarking, parameter data assimilation, state data assimilation), and data processing (model inputs and data constraints) This project is motivated by the fact that many of the most pressing questions about global change are limited by our ability to synthesize existing data and strategically prioritize the collection of new data. This project seeks to improve this ability by developing a framework for integrating multiple data sources in a sensible manner. The workflow system allows ecosystem modeling to be more reproducible, automated, and transparent in terms of operations applied to data, and thus ultimately more comprehensible to both peers and the public. It reduces the redundancy of effort among modeling groups, facilitate collaboration, and make models more accessible the rest of the research community. PEcAn is not itself an ecosystem model, and it can be used to with a variety of different ecosystem models; integrating a model involves writing a wrapper to convert inputs and outputs to and from the standards used by PEcAn. Currently, PEcAn supports multiple models listed PEcAn Models. Acknowledgements The PEcAn project is supported financially by the following: National Science Foundation (NSF): 1062547 1062204 1241894 1261582 1318164 1346748 1458021 1638577 1655095 1702996 National Aeronautics and Space Administration (NASA) NNX14AH65G NNX16AO13H 80NSSC17K0711 Advanced Research Projects Agency-Energy (ARPA-E) DE-AR0000594 Department of Defense, Strategic Environmental Research and Development Program (DOD-SERDP), grant RC2636 Energy Biosciences Institute, University of Illinois Amazon Web Services (AWS) Google Summer of Code BETYdb is a product of the Energy Biosciences Institute at the University of Illinois at Urbana-Champaign. We gratefully acknowledge the great effort of other researchers who generously made their own data available for further study. PEcAn is a collaboration among research groups at the Department of Earth And Environment at Boston University, the Energy Biosciences Institute at the University of Illinois, the Image Spatial Data Analysis group at NCSA, the Department of Atmospheric & Oceanic Sciences at the University Wisconsin-Madison, the Terrestrial Ecosystem Science & Technology (TEST) Group at Brookhaven National Laboratory, and the Joint Global Change Research Institute (JGCRI) at the Pacific Northwest National Laboratory. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the author(s) and do not necessarily reflect the views of the NSF, NASA, Boston University, University of Illinois, Brookhaven National Lab, Pacific National Lab, Battelle, the US Department of Defense, or the US Department of Energy. PEcAn Publications Fer I, R Kelly, P Moorcroft, AD Richardson, E Cowdery, MC Dietze. 2018. Linking big models to big data: efficient ecosystem model calibration through Bayesian model emulation. Biogeosciences Discussions doi:10.5194/bg-15-5801-2018 Feng X, Uriarte M, González G, et al. Improving predictions of tropical forest response to climate change through integration of field studies and ecosystem modeling. Glob Change Biol. 2018;24:e213–e232.doi:10.1111/gcb.13863 Dietze, M. C. (2017), Prediction in ecology: a first-principles framework. Ecol Appl, 27: 2048-2060. doi:10.1002/eap.1589 Fisher RA, Koven CD, Anderegg WRL, et al. 2017. Vegetation demographics in Earth System Models: A review of progress and priorities. Glob Change Biol. doi:10.1111/gcb.13910 Rollinson, C. R., Liu, Y., Raiho, A., Moore, D. J.P., McLachlan, J., Bishop, D. A., Dye, A., Matthes, J. H., Hessl, A., Hickler, T., Pederson, N., Poulter, B., Quaife, T., Schaefer, K., Steinkamp, J. and Dietze, M. C. (2017), Emergent climate and CO2 sensitivities of net primary productivity in ecosystem models do not agree with empirical data in temperate forests of eastern North America. Glob Change Biol. Accepted Author Manuscript. doi:10.1111/gcb.13626 LeBauer, D., Kooper, R., Mulrooney, P., Rohde, S., Wang, D., Long, S. P. and Dietze, M. C. (2017), betydb: a yield, trait, and ecosystem service database applied to second-generation bioenergy feedstock production. GCB Bioenergy. doi:10.1111/gcbb.12420 Rogers A, BE Medlyn, J Dukes, G Bonan, S von Caemmerer, MC Dietze, J Kattge, ADB Leakey, LM Mercado, U Niinemets, IC Prentice, SP Serbin, S Sitch, DA Way, S Zaehle. 2017. “A Roadmap for Improving the Representation of Photosynthesis in Earth System Models” New Phytologist 213(1):22-42 doi:10.1111/nph.14283 Shiklomanov. A, MC Dietze, T Viskari, PA Townsend, SP Serbin. 2016 “Quantifying the influences of spectral resolution on uncertainty in leaf trait estimates through a Bayesian approach to RTM inversion” Remote Sensing of the Environment 183: 226-238. doi:10.1016/j.rse.2016.05.023 Viskari et al. 2015 Model-data assimilation of multiple phenological observations to constrain and forecast leaf area index. Ecological Applications 25(2): 546-558. doi:10.1890/14-0497.1 Dietze, M. C., S. P. Serbin, C. Davidson, A. R. Desai, X. Feng, R. Kelly, R. Kooper, D. LeBauer, J. Mantooth, K. McHenry, and D. Wang (2014) A quantitative assessment of a terrestrial biosphere model’s data needs across North American biomes. Journal of Geophysical Research-Biogeosciences doi:10.1002/2013jg002392 LeBauer, D.S., D. Wang, K. Richter, C. Davidson, & M.C. Dietze. (2013). Facilitating feedbacks between field measurements and ecosystem models. Ecological Monographs. doi:10.1890/12-0137.1 Wang, D, D.S. LeBauer, and M.C. Dietze(2013) Predicting yields of short-rotation hybrid poplar (Populus spp.) for the contiguous US through model-data synthesis. Ecological Applications doi:10.1890/12-0854.1 Dietze, M.C., D.S LeBauer, R. Kooper (2013) On improving the communication between models and data. Plant, Cell, & Environment doi:10.1111/pce.12043 PEcAn Project Google Scholar page Longer / auto-updated list of publications that mention PEcAn’s full name in Google Scholar "],["contributor-covenant-code-of-conduct.html", "2 Contributor Covenant Code of Conduct", " 2 Contributor Covenant Code of Conduct Our Pledge In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. Our Standards Examples of behavior that contributes to creating a positive environment include: Using welcoming and inclusive language Being respectful of differing viewpoints and experiences Gracefully accepting constructive criticism Focusing on what is best for the community Showing empathy towards other community members Examples of unacceptable behavior by participants include: The use of sexualized language or imagery and unwelcome sexual attention or advances Trolling, insulting/derogatory comments, and personal or political attacks Public or private harassment Publishing others’ private information, such as a physical or electronic address, without explicit permission Other conduct which could reasonably be considered inappropriate in a professional setting Our Responsibilities Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. Scope This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at pecanproj[at]gmail.com. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project’s leadership. Attribution This Code of Conduct is adapted from the Contributor Covenant version 1.4, available at http://contributor-covenant.org/version/1/4. "],["about-the-pecan-book.html", "3 About the PEcAn Book", " 3 About the PEcAn Book This book serves as documentation for the PEcAn Project. It contains descriptions of topics necessary to inform a beginner and advanced user as well as requisite materials for developers. It does not contain low-level descriptions of functions within PEcAn. Our aim for this documentation is to educate you about the PEcAn software, the possibilities of its usage, and the standards,expectations, and core workflows for developers. This book is organized four main topics: Introduction - Brief explanation of PEcAn, how to obtain the PEcAn VM, and explanation of basic web interface functions. Tutorials/Demos/Workflows - All User and Developer tutorials/demos/workflows to explain how to use and add to PEcAn in different ways. Topical Pages - Explanation of main PEcAn components and how they fit together. Appendix - External documentation and sources of information and a FAQ section. "],["general-feedbackcommentssuggestions.html", "3.1 General Feedback/Comments/Suggestions", " 3.1 General Feedback/Comments/Suggestions We want your ideas, thoughts, comments, and suggestions! As a community we are committed to creating an inclusive and supportive atmosphere so we all to reach out to us in the following ways: Github: https://github.com/PecanProject/pecan This is the main hub of communication surrounding PEcAn development. Check out the issues section to see known bugs, upcoming features, and ideas for future development. Feel free to comment on existing issues or open new ones with questions, bug reports, feature requests, and/or ideas. Slack: https://pecanproject.slack.com/ Slack serves as our day to day mode of communication. To join us in slack you will need to create an account first. This is done in 3 steps: Request an inivitation to join Slack, this will be send by email to address you provided. Check your inbox for an email from Slack with subject “Rob Kooper has invited you to join a Slack workspace”. This email should have a link that you can click to join slack. When you click a webpage will open up that asks you to create an account, once that is done you can login into the slack chatrooms. Email: pecanproj[at]gmail.com If you do not wish your communication with the team to be public, send us an email at the address above and we will get back to you as soon as possible. "],["bookediting.html", "3.2 Editing this book", " 3.2 Editing this book The file organization of this documentation can be described simply as follows: Each chapter is in its own file (within the corresponding section). Each group of chapters (i.e. “part” in LaTeX) is in its own directory. Sections and chapters are rendered (and numbered) in alpha-numerical order of their corresponding file names. Therefore, each section directory and chapter file name should be prefixed with a two-digit (zero-padded) number. File and directory names should be as similar as possible to the name of the corresponding chapter or section. For instance, the file name for this chapter’s source file is 06_reference/10_editing_this_book.Rmd. This numbering means that if you need to create an additional chapter before an existing one, you will have to renumber all chapters following it. To ensure correct rendering, you should also make sure that each chapter starts with a level 1 heading (# heading). For instance, this chapter’s source starts with: # Editing this book {#bookediting} The file organization of this documentation can be described simply as follows: ... Furthermore, to keep the organization consistent, each chapter should have exactly one level 1 heading (i.e. do not combine multiple chapters into a single file). In other words, do not spread a single chapter across multiple files, and do not put multiple chapters in the same file. Each section directory has a file starting with 00 that contains only the section (or “Part”) title. This is used to create the greyed-out section headers in the rendered HTML. For instance, this section has a file called 00_introduction.Rmd which contains only the following: # (PART) Introduction {-} To cross-reference a different section, use that section’s unique tag (starts with #; appears next to the section heading surrounded in curly braces). For instance, the following Markdown contains two sections that cross-reference each other: ## Introduction {#intro} Here is the intro. This is a link to the [next section](#section-one). ## First section. {#section-one} As mentioned in the [previous section](#intro). If no header tag exists for a section you want to cross-reference, you should create one. We have no strict rules about this, but it’s useful to have tags that give some sense of their parent hierarchy and reference their parent sections (e.g. #models, #models-ed, and #models-ed-xml to refer to a chapter on models, with a subsection on ED and a sub-subsection on ED XML configuration). If section organization changes, it is fine to move header tags, but avoid altering existing tags as this will break all the links pointing to that tag. (Note that it is also possible to link to section headings by their exact title. However, this is not recommended because these section titles could change, which would break the links.) When referring to PEcAn packages or specific functions, it is a good idea to link to the rendered package documentation. For instance, here are links to the models/ed package, the PEcAn.ED2::modify_ed2in function, and the PEcAnRTM package vignette. If necessary, you can also link directly to specific lines or blocks in the source code on GitHub, like this. (To get a link to a line, click its line number. To then select a block, shift-click another line number.) To insert figures, use knitr::include_graphics(\"path/to/figure.png\") inside an R code chunk. For example: ```{r} knitr::include_graphics("04_advanced_user_guide/images/Input_ID_name.png") ``` Note that image file names are relative to the book_source directory, NOT to the markdown file. In other words, if myimage.png was in the same directory as this file, I would still have to reference it as 06_reference/myimage.png – I could not just do myimage.png. The size, caption, and other properties of the rendered image can be controlled via chunk options. For additional information about how bookdown works (including information about its syntax), see the Bookdown free online book. "],["how-to-create-your-own-version-of-documentation.html", "3.3 How to create your own version of Documentation", " 3.3 How to create your own version of Documentation To create your own version of documentation you’ll need to follow these steps: These procedures assume you have an github account, you forked pecan, you have cloned pecan locally, and have a TRAVIS account. 1. Create a repository under your github account with the name “pecan-documentation”. Clear it of any files. Set up the repository with Github Pages by going to the settings tab for that repository. 2. Create a personal access token for github: https://help.github.com/en/articles/creating-a-personal-access-token-for-the-command-line and copy it. 3. Create a TRAVIS environment variable called GITHUB_PAT and save the access token you made as a secret variable. 4. Create a branch from your local pecan repository with a name that starts with release/. (ie. Release/vtonydoc) 5. Make whichever changes you would like to the documentation and push it up to your fork. From here TRAVIS will build your documentation. The web version of your documentation will be rendered with the url following the structure: username.github.io/pecan-documentation/pattern_after_release/ "],["pecan-manual-setup.html", "4 Install PEcAn", " 4 Install PEcAn These instructions are provided to document how to install and setup PEcAn. It includes: Virtual machine (Recommended for students and new users) PEcAn Docker (Recommended for development and production deployment) Local installation from r-universe (Recommended for development only) PEcAn OS specific installation The PEcAn code and necessary infrastructure can be obtained and compiled in different ways. This set of instructions will help facilitate your path and the steps necessary to move forward to have a fully a functioning PEcAn environment. "],["install-vm.html", "4.1 Virtual Machine (VM)", " 4.1 Virtual Machine (VM) The PEcAn virtual machine consists of all of PEcAn pre-compiled within a Linux operating system and saved in a “virtual machine” (VM). Virtual machines allow for running consistent set-ups without worrying about differences between operating systems, library dependencies, compiling the code, etc. Install VirtualBox This is the software that runs the virtual machine. You can find the download link and instructions at https://www.virtualbox.org. NOTE: On Windows you may see a warning about Logo testing, it is okay to ignore the warning. Download the PEcAn VM You can find the download link at http://opensource.ncsa.illinois.edu/projects/artifacts.php?key=PECAN, under the “Files” header. Click the “.ova” file to begin the download. Note that the file is ~7 GB, so this download can take several minutes to hours depending on your connection speed. Also, the VM requires >4 GB of RAM to operate correctly. Please check current usage of RAM and shutdown processes as needed. Import the VM Once the download is complete, open VirtualBox. In the VirtualBox menus, go to “File” → “Import Appliance” and locate the downloaded “.ova” file. For Virtualbox version 5.x: In the Appliance Import Settings, make sure you select “Reinitialize the MAC address of all network cards” (picture below). This is not selected by default and can result in networking issues since multiple machines might claim to have the same network MAC Address. For Virtualbox versions starting with 6.0, there is a slightly different interface (see figure). Select “Generate new MAC addresses for all network adapters” from the MAC Address Policy: NOTE: If you experience network connection difficulties in the VM with this enabled, try re-importing the VM without this setting selected). Finally, click “Import” to build the Virtual Machine from its image. Launch PEcAn Double click the icon for the PEcAn VM. A terminal window will pop up showing the machine booting up which may take a minute. It is done booting when you get to the pecan login: prompt. You do not need to login as the VM behaves like a server that we will be accessing through you web browser. Feel free to minimize the VM window. If you do want to login to the VM, the credentials are as follows: username: carya, password: illinois (after the pecan tree, [Carya illinoinensis][pecan-wikipedia]). Open the PEcAn web interface With the VM running in the background, open any web browser on the same machine and navigate to localhost:6480/pecan/ to start the PEcAn workflow. (NOTE: The trailing backslash may be necessary depending on your browser) Advanced interaction with the VM is mostly done through the command line. You can perform these manipulations from inside the VM window. However, the VM is also configured for SSH access (username carya, hostname localhost, port 6422). For instance, to open an interactive shell inside the VM from a terminal on the host machine, use a command like ssh -l carya -p 6422 localhost (when prompted, the password is illinois, as above). These steps should be enough to get you started with running models and performing basic analyses with PEcAn. For advanced details on VM configuration and maintenance, see the Advanced VM section. "],["install-docker.html", "4.2 Docker", " 4.2 Docker This is a short documentation on how to start with Docker and PEcAn. This will not go into much detail about about how to use Docker – for more details, see the main Docker topical page. Install Docker. Follow the instructions for your operating system at https://www.docker.com/community-edition#/download. Once Docker is installed, make sure it is running. To test that Docker is installed and running, open a terminal and run the following commands: docker run hello-world If successful, this should return a message starting with \"Hello from Docker!\". If this doesn’t work, there is something wrong with your configuration. You may need to open the Docker desktop app to complete the installation. Refer to the Docker documentation for debugging. NOTE: Depending on how Docker is installed and configured, you may have to run this command as sudo. Try running the command without sudo first. If that fails, but running as sudo succeeds, see these instructions for steps to use Docker as a non-root user. Install docker compose. If you are running this on a Mac or Windows this might be already installed. On Linux you will need to install this it separately; see https://docs.docker.com/compose/install/. To see if docker compose is successfully installed, use the following shell command: docker compose version This should print the current version of docker-compose. We have tested the instruction below with versions of docker compose 1.22 and above. Download the PEcAn docker compose file. It is located in the root directory of the PEcAn source code. For reference, here are direct links to the latest stable version and the bleeding edge development version. (To download the files, you should be able to right click the link and select “Save link as”.) Make sure the file is saved as docker-compose.yml in a directory called pecan. Initialize the PEcAn database and data images. The following docker compose commands are used to download all the data PEcAn needs to start working. For more on how they work, see our Docker topical pages. Create and start the PEcAn database container (without any data) docker compose up -d postgres If this is successful, the end of the output should look like the following: Creating pecan-postgres-1 ... done “Initialize” the data for the PEcAn database. docker run --rm --network pecan_pecan pecan/db This should produce a lot of output describing the database operations happening under the hood. Some of these will look like errors, but this is normal. This command succeeded if the output ends with the following (the syntax for creating a new user for accessing BetyDB): docker compose run bety user 'login' 'password' 'full name' 'email' 1 1 Add a user to BetyDB using the example syntax provided as the last line of the output of the previous step: # guest user docker compose run --rm bety user guestuser guestuser "Guest User" guestuser@example.com 4 4 # example user docker compose run --rm bety user carya illinois "Carya Demo User" carya@example.com 1 1 Download and configure the core PEcAn database files. docker run -ti --rm --network pecan_pecan --volume pecan_pecan:/data --env FQDN=docker pecan/data:develop This will produce a lot of output describing file operations. This command succeeded if the output ends with the following: ###################################################################### Done! ###################################################################### Download the pecan/docker/env.example & save it as .env file. Now, open the .env file & uncomment the following lines: COMPOSE_PROJECT_NAME=pecan PECAN_VERSION=develop Setting PECAN_VERSION=develop indicates that you want to run the bleeding-edge develop branch, meaning it may have bugs. To go ahead with the stable version you may set PECAN_VERSION=latest or PECAN_VERSION=<release-number> (For example 1.7.0). You can look at the list of all the releases of PEcAn to see what options are availble. Start the PEcAn stack. Assuming all of the steps above completed successfully, start the full stack by running the following shell command: docker compose up -d If all of the containers started successfully, you should be able to access the various components from a browser via the following URLs (if you run these commands on a remote machine replace localhost with the actual hostname). PEcAn documentation and home page – http://pecan.localhost PEcAn web interface (running models) – http://pecan.localhost/pecan/ (NOTE: The trailing slash is necessary.) BETY web interface – http://pecan.localhost/bety/ (Default username: carya, default password: illinois) RStudio interface – http://pecan.localhost/rstudio/ (Default username: carya, default password: illinois) Monitor, service that monitors models and shows all models that are online as well as how many instances are online and the number of jobs waiting. The output is in JSON – http://pecan.localhost/monitor/ To shut down the docker images run docker-compose stop. For troubleshooting and advanced configuration, see our Docker topical pages. "],["r-universe.html", "4.3 Installation From r-universe", " 4.3 Installation From r-universe PEcAn.all and all it’s dependencies can be installed from r-universe with the following code: # Enable repository from pecanproject options(repos = c( pecanproject = 'https://pecanproject.r-universe.dev', CRAN = 'https://cloud.r-project.org')) # Download and install PEcAn.all in R install.packages('PEcAn.all') If you are planning on working on PEcAn development with a local install, consider adding the above options() to a project-level .Rprofile as well. For example, by using usethis::edit_r_profile(\"project\"). This will allow devtools to install dependencies from r-universe, for example devtools::install_deps(\"models/ed\") should work when “https://pecanproject.r-universe.dev” is set as a repo. NOTE: functionality of a local install will be limited without a connection to a BETYdb instance. Instructions to install BETY. "],["install-native.html", "4.4 (Advanced) Native install", " 4.4 (Advanced) Native install The full PEcAn system has a lot of dependencies, including R packages, compiled C and Fortran libraries, and system services and utilities. Installing all of these side by side, and getting them to work harmoniously, is very time-consuming and challenging, which is why we strongly encourage new users to use the VM or Docker if possible. In a nutshell, the process for manual installation is as follows: Download the PEcAn source code from GitHub. The recommended way to do this is with the shell command git clone, i.e. git clone https://github.com/pecanproject/pecan. Download the BETY source code from GitHub. Install the PEcAn R packages and their dependencies. This can be done by running the shell command make inside the PEcAn source code directory. Note that many of the R packages on which PEcAn depends have system dependencies that are not included by default on most operating systems, but almost all of which should be available via your operating system’s package manager (e.g. Homebrew for MacOS, apt for Ubuntu/Debian/Mint Linux, yum for RedHat Fedora/CentOS). Install and configure PostgreSQL Install and configure the Apache web server. For more details, see our notes about OS Specific Installations. "],["user-section.html", "5 Tutorials ", " 5 Tutorials "],["pecan-in-a-nutshell.html", "5.1 How PEcAn Works in a nutshell", " 5.1 How PEcAn Works in a nutshell PEcAn provides an interface to a variety of ecosystem models and attempts to standardize and automate the processes of model parameterization, execution, and analysis. First, you choose an ecosystem model, then the time and location of interest (a site), the plant community (or crop) that you are interested in simulating, and a source of atmospheric data from the BETY database (LeBauer et al, 2010). These are set in a “settings” file, commonly named pecan.xml which can be edited manually if desired. From here, PEcAn will take over and set up and execute the selected model using your settings. The key is that PEcAn uses models as-is, and all of the translation steps are done within PEcAn so no modifications are required of the model itself. Once the model is finished it will allow you to create graphs with the results of the simulation as well as download the results. It is also possible to see all past experiments and simulations. There are two ways of using PEcAn, via the web interface and directly within R. Even for users familiar with R, using the web interface is a good place to start because it provides a high level overview of the PEcAn workflow. The quickest way to get started is to download the virtual machine or use an AWS instance. "],["demo-table.html", "5.2 PEcAn Demos", " 5.2 PEcAn Demos The following Tutorials assume you have installed PEcAn. If you have not, please consult the PEcAn Installation Section. Type Title Web Link Source Rmd Demo Basic Run html Rmd Demo Uncertainty Analysis html Rmd Demo Output Analysis html Rmd Demo MCMC html Rmd Demo Parameter Assimilation html Rmd Demo State Assimilation html Rmd Demo Sensitivity html Rmd Vignette Allometries html Rmd Vignette MCMC html Rmd Vignette Meteorological Data html Rmd Vignette Meta-Analysis html Rmd Vignette Photosynthetic Response Curves html Rmd Vignette Priors html Rmd Vignette Leaf Spectra:PROSPECT inversion html Rmd "],["demo-1.html", "5.3 Demo 01: Basic Run PEcAn", " 5.3 Demo 01: Basic Run PEcAn 5.3.0.1 Objective We will begin by exploring a set of web-based tools that are designed to run single-site model runs. A lot of the detail about what’s going on under the hood, and all the outputs that PEcAn produces, are left to Demo 2. This demo will also demonstrate how to use PEcAn outputs in additional analyses outside of PEcAn. 5.3.0.2 PEcAn URL In the following demo, URL is the web address of a PEcAn server and will refer to one of the following: If you are doing a live demo with the PEcAn team, URL was provided If you are running the PEcAn virtual machine: URL = localhost:6480 If you are running PEcAn using Amazon Web Services (AWS), URL is the Public IP If you are running PEcAn using Docker, URL is pecan.localhost If you followed instructions found in Install PEcAn by hand, URL is your server’s IP 5.3.0.3 Start PEcAn: Enter URL in your web browser Click the link for the PEcAn web interface Click the ‘Next’ button in the sidebar to move to the “Site Selection” page. 5.3.0.4 Site Selection 5.3.0.5 Host Select the local machine “pecan”. Other options exist if you’ve read and followed instructions found in Remote execution with PEcAn. 5.3.0.6 Model Select SIPNET (r136) from the available models because it is quick & simple. Reference material can be found in Models in PEcAn 5.3.0.7 Site Group To filter sites, you can select a specific group of sites. For this tutorial we will use Ameriflux. 5.3.0.8 Conversion: Select the conversion check box, to show all sites that PEcAn is capable of generating model drivers for automatically. By default (unchecked), PEcAn only displays sites where model drivers already exist in the system database 5.3.0.9 Site: For this tutorial, type US-NR1 in the search box to display the Niwot Ridge Ameriflux site (US-NR1), and then click on the pin icon. When you click on a site’s flag on the map, it will give you the name and location of the site and put that site in the “Site:” box on the left hand site, indicating your current selection. Once you are finished with the above steps, click “Next”. 5.3.0.10 Run Specification Next we will specify settings required to run the model. Be aware that the inputs required for any particular model may vary somewhat so there may be addition optional or required input selections available for other models. 5.3.0.11 PFT (Plant Functional Type): Niwot Ridge is temperate coniferous. Available PFTs will vary by model and some models allow multiple competing PFTs to be selected. Also select soil to control the soil parameters 5.3.0.12 Start/End Date: Select 2003/01/01 to 2006/12/31. In general, be careful to select dates for which there is available driver data. 5.3.0.13 Weather Data: For Sipnet.climna select “Use AmerifluxLBL” from the Available Meteorological Drivers. 5.3.0.14 Optional Settings: Leave all blank for demo run Email sends a message when the run is complete. Use Brown Dog will use the Brown Dog web services in order to do input file conversions. (Note: Required if you select Use NARR for Weather Data) Edit pecan.xml allows you to configure advanced settings via the PEcAn settings file Edit model config pauses the workflow after PEcAn has written all model specific settings but before the model runs are called and allows users to configure any additional settings internal to the model. Advanced Setup controls ensemble and sensitivity run settings discussed in Demo 2. Finally, click “Next” to start the model run. 5.3.0.15 Data Use Policies The last step before the run starts is to read and agree to AmeriFlux’s data policy and give a valid username. If you don’t already have an Ameriflux username, click “register here” and create one. If you selected a different data source, this step may or may not be needed: you will need to agree to a data policy if your source has one, but if it doesn’t then the run will start immediately. 5.3.0.16 If you get an error in your run If you get an error in your run as part of a live demo or class activity, it is probably simplest to start over and try changing options and re-running (e.g. with a different site or PFT), as time does not permit detailed debugging. If the source of the error is not immediately obvious, you may want to take a look at the workflow.Rout to see the log of the PEcAn workflow or the logfile.txt to see the model execution output log and then refer to the Documentation or the Chat Room for help. 5.3.0.17 Model Run Workflow 5.3.0.18 MET Process: First, PEcAn will download meteorological data based on the type of the Weather Data you chose, and process it into the specific format for the chosen model 5.3.0.19 TRAIT / META: PEcAn then estimates model parameters by performing a meta-analysis of the available trait data for a PFT. TRAIT will extract relevant trait data from the database. META performs a hierarchical Bayes meta-analysis of available trait data. The output of this analysis is a probability distribution for each model parameter. PEcAn selects the median value of this parameter as the default, but in Demo 2 we will see how PEcAn can use this parameter uncertainty to make probabilistic forecasts and assess model sensitivity and uncertainty. Errors at this stage usually indicate errors in the trait database or incorrectly specified PFTs (e.g. defining a variable twice). 5.3.0.20 CONFIG: writes model-specific settings and parameter files 5.3.0.21 MODEL: runs model. 5.3.0.22 OUTPUT: All model outputs are converted to standard netCDF format 5.3.0.23 ENSEMBLE & SENSITIVITY: If enabled post-process output for these analyses If at any point a Stage Name has the Status “ERROR” please notify the PEcAn team member that is administering the demo or feel free to do any of the following: Refer to the PEcAn Documentation for documentation Post the end of your workflow log on our Slack Channel chat Post an issue on Github. The entire PEcAn team welcomes any questions you may have! If the Finished Stage has a Status of “DONE”, congratulations! If you got this far, you have managed to run an ecosystem model without ever touching a line of code! Now it’s time to look at the results click Finished. FYI, adding a new model to PEcAn does not require modification of the model’s code, just the implementation of a wrapper function. 5.3.0.24 Output and Visualization For now focus on graphs, we will explore all of PEcAn’s outputs in more detail in Demo 02. 5.3.0.25 Graphs Select a Year and Y-axis Variable, and then click ‘Plot run/year/variable’. Initially leave the X-axis as time. Within this figure the points indicate the daily mean for the variable while the envelope encompasses the diurnal variability (max and min). Variable names and units are based on a standard netCDF format. Try looking at a number of different output variables over different years. Try changing the X-axis to look at bivariate plots of how different output variables are related to one another. Be aware that PEcAn currently runs a moving min/mean/max through bivariate plots, just as it does with time series plots. In some cases this makes more sense than others. 5.3.0.26 Alternative Visualization: R Shiny Click on Open SHINY, which will open a new browser window. The shiny app will automatically access your run’s output files and allow you to visualize all output variables as a function of time. Use the pull down menu under Variable Name to choose whichever output variable you wish to plot. 5.3.0.27 Model Run Archive Return to the output window and Click on the HISTORY button. Click on any previous run in the “ID” column to go to the current state of that run’s execution – you can always return to old runs and runs in-progress this way. The run you just did should be the more recent entry in the table. For the next analysis, make note of the ID number from your run. 5.3.0.28 Next steps 5.3.0.28.1 Analyzing model output Follow this tutorial, [Analyze Output] to learn how to open model output in R and compare to observed data 5.3.0.29 DEMO 02 Demo 02: Sensitivity and Uncertainty Analysis will show how to perform Ensemble & Sensitivity Analyses through the web interface and explore the PEcAn outputs in greater detail, including the trait meta-analysis "],["demo-2.html", "5.4 Demo 02: Sensitivity and Uncertainty Analysis", " 5.4 Demo 02: Sensitivity and Uncertainty Analysis In Demo 2 we will be looking at how PEcAn can use information about parameter uncertainty to perform three automated analyses: Ensemble Analysis: Repeat numerous model runs, each sampling from the parameter uncertainty, to generate a probability distribution of model projections. Allows us to put a confidence interval on the model Sensitivity Analysis: Repeats numerous model runs to assess how changes in model parameters will affect model outputs. Allows us to identify which parameters the model is most sensitive to. Uncertainty Analysis: Combines information about model sensitivity with information about parameter uncertainty to determine the contribution of each model parameter to the uncertainty in model outputs. Allow us to identify which parameters are driving model uncertainty. 5.4.1 Run Specification Return to the main menu for the PEcAn web interface: Click the “Start Over” button or navigate back to URL/pecan/ Repeat the steps for site selection and run specification from Demo 01, but also click on “Advanced setup”, then click Next. By clicking Advanced setup, PEcAn will first show an Analysis Menu, where we are going to specify new settings. For an ensemble analysis, increase the number of runs in the ensemble, in this case set Runs to 50. In practice you would want to use a larger ensemble size (100-5000) than we are using in the demo. The ensemble analysis samples parameters from their posterior distributions to propagate this uncertainty into the model output. PEcAn’s sensitivity analysis holds all parameters at their median value and then varies each parameter one-at-a-time based on the quantiles of the posterior distribution. PEcAn also includes a handy shortcut, which is the default behavior for the web interface, that converts a specified standard deviation into its Normal quantile equivalent (e.g. 1 and -1 are converted to 0.157 and 0.841). In this example set Sensitivity to -2,-1,1,2 (the median value, 0, occurs by default). We also can tell PEcAn which variable to run the sensitivity on. Here, set Variables to NEE, so we can compare against flux tower NEE observations. Click Next 5.4.2 Additional Outputs: The PEcAn workflow will take considerably longer to complete since we have just asked for over a hundred model runs. Once the runs are complete you will return to the output visualization page were there will be a few new outputs to explore, as well as outputs that were present earlier that we’ll explore in greater details: 5.4.2.1 Run ID: While the sensitivity and ensemble analyses synthesize across runs, you can also select individual runs from the Run ID menu. You can use the Graphs menu to visualize each individual run, or open individual runs in Shiny 5.4.2.2 Inputs: This menu shows the contents of /run which lets you look at and download: A summary file (README.txt) describing each run: location, run ID, model, dates, whether it was in the sensitivity or ensemble analysis, variables modified, etc. The model-specific input files fed into the model The jobs.sh file used to submit the model run 5.4.2.3 Outputs: This menu shows the contents of /out. A number of files generated by the underlying ecosystem model are archived and available for download. These include: Output files in the standardized netCDF ([year].nc) that can be downloaded for visualization and analysis (R, Matlab, ncview, panoply, etc) Raw model output in model-specific format (e.g. sipnet.out). Logfile.txt contains job.sh & model error, warning, and informational messages 5.4.2.4 PFTs: This menu shows the contents of /pft. There is a wide array of outputs available that are related to the process of estimating the model parameters and running sensitivity/uncertainty analyses for a specific Plant Functional Type. TRAITS: The Rdata files trait.data.Rdata and jagged.data.Rdata are, respectively, the available trait data extracted from the database that was used to estimate the model parameters and that same data cleaned and formatted for the statistical code. The list of variables that are queried is determined by what variables have priors associated with them in the definition of the PFTs. Priors are output into prior.distns.Rdata. Likewise, the list of species that are associated with a PFT determines what subset of data is extracted out of all data matching a given variable name. Demo 3 will demonstrate how a PFT can be created or modified. To look at these files in RStudio click on these files to load them into your workspace. You can further examine them in the Environment window or accessing them at the command line. For example, try typing names(trait.data) as this will tell you what variables were extracted, names(trait.data$Amax) will tell you the names of the columns in the Amax table, and summary(trait.data$Amax) will give you summary data about the Amax values. META-ANALYSIS: *.bug: The evaluation of the meta-analysis is done using a Bayesian statistical software package called JAGS that is called by the R code. For each trait, the R code will generate a [trait].model.bug file that is the JAGS code for the meta-analysis itself. This code is generated on the fly, with PEcAn adding or subtracting the site, treatment, and greenhouse terms depending upon the presence of these effects in the data itself. If the <random.effects> tag is set to FALSE then all random effects will be turned off even if there are multiple sites. meta-analysis.log contains a number of diagnostics, including the summary statistics of the model, an assessment of whether the posterior is consistent with the prior, and the status of the Gelman-Brooks-Rubin convergence statistic (which is ideally 1.0 but should be less than 1.1). ma.summaryplots.*.pdf are collections of diagnostic plots produced in R after the above JAGS code is run that are useful in assessing the statistical model. Open up one of these pdfs to evaluate the shape of the posterior distributions (they should generally be unimodal), the convergence of the MCMC chains (all chains should be mixing well from the same distribution), and the autocorrelation of the samples (should be low). traits.mcmc.Rdata contains the raw output from the statistical code. This includes samples from all of the parameters in the meta-analysis model, not just those that feed forward to the ecosystem, but also the variances, fixed effects, and random effects. post.distns.Rdata stores a simple tables of the posterior distributions for all model parameters in terms of the name of the distribution and its parameters. posteriors.pdf provides graphics showing, for each model parameter, the prior distribution, the data, the smoothed histogram of the posterior distribution (labeled post), and the best-fit analytical approximation to that smoothed histogram (labeled approx). Open posteriors.pdf and compare the posteriors to the priors and data SENSITIVITY ANALYSIS sensitivity.analysis.[RunID].[Variable].[StartYear].[EndYear].pdf shows the raw data points from univariate one-at-a-time analyses and spline fits through the points. Open this file to determine which parameters are most and least sensitive UNCERTAINTY ANALYSIS variance.decomposition.[RunID].[Variable].[StartYear].[EndYear].pdf, contains three columns, the coefficient of variation (normalized posterior variance), the elasticity (normalized sensitivity), and the partial standard deviation of each model parameter. Open this file for BOTH the soil and conifer PFTS and answer the following questions: The Variance Decomposition graph is sorted by the variable explaining the largest amount of variability in the model output (right hand column). From this graph identify the top-tier parameters that you would target for future constraint. A parameter can be important because it is highly sensitive, because it is highly uncertain, or both. Identify parameters in your output that meet each of these criteria. Additionally, identify parameters that are highly uncertain but unimportant (due to low sensitivity) and those that are highly sensitive but unimportant (due to low uncertainty). Parameter constraints could come from further literature synthesis, from direct measurement of the trait, or from data assimilation. Choose the parameter that you think provides the most efficient means of reducing model uncertainty and propose how you might best reduce uncertainty in this process. In making this choice remember that not all processes in models can be directly observed, and that the cost-per-sample for different measurements can vary tremendously (and thus the parameter you measure next is not always the one contributing the most to model variability). Also consider the role of parameter uncertainty versus model sensitivity in justifying your choice of what parameters to constrain. 5.4.2.5 PEcAn Files: This menu shows the contents of the root workflow folder that are not in one of the folders indicated above. It mostly contains log files from the PEcAn workflow that are useful if the workflow generates an error, and as metadata & provenance (a detailed record of how data was generated). STATUS gives a summary of the steps of the workflow, the time they took, and whether they were successful pecan.*.xml are PEcAn settings files workflow.R is the workflow script workflow.Rout is the corresponding log file samples.Rdata contains the parameter values used in the runs. This file contains two data objects, sa.samples and ensemble.samples, that are the parameter values for the sensitivity analysis and ensemble runs respectively sensitivity.output.[RunID].[Variable].[StartYear].[EndYear].Rdata contains the object sensitivity.output which is the model outputs corresponding to the parameter values in sa.samples. ENSEMBLE ANALYSIS ensemble.Rdata contains contains the object ensemble.output, which is the model predictions at the parameter values given in ensemble.samples. ensemble.analysis.[RunID].[Variable].[StarYear].[EndYear].pdf contains the ensemble prediction as both a histogram and a boxplot. ensemble.ts.[RunID].[Variable].[StartYear].[EndYear].pdf contains a time-series plot of the ensemble mean, median, and 95% CI 5.4.2.6 Global Sensitivity: Shiny Navigate to URL/shiny/global-sensitivity. This app uses the output from the ENSEMBLE runs to perform a global Monte Carlo sensitivity analysis. There are three modes controlled by Output type: Pairwise looks at the relationship between a specific parameter (X) and output (Y) All parameters looks at how all parameters affect a specific output (Y) All variables looks at how all outputs are affected by a specific parameter(X) In all of these analyses, the app also fits a linear regression to these scatterplots and reports a number of summary statistics. Among these, the slope is an indicator of global sensitivity and the R2 is an indicator of the contribution to global uncertainty 5.4.2.7 Next Steps The next set of tutorials will focus on the process of data assimilation and parameter estimation. The next two steps are in “.Rmd” files which can be viewed online. 5.4.2.8 Assimilation ‘by hand’ Explore how model error changes as a function of parameter value (i.e. data assimilation ‘by hand’) 5.4.2.9 MCMC Concepts Explore Bayesian MCMC concepts using the photosynthesis module 5.4.2.10 More info about tools, analyses, and specific tasks… Additional information about specific tasks (adding sites, models, data; software updates; etc.) and analyses (e.g. data assimilation) can be found in the PEcAn documentation If you encounter a problem with PEcAn that’s not covered in the documentation, or if PEcAn is missing functionality you need, please search known bugs and issues, submit a bug report, or ask a question in our chat room. Additional questions can be directed to the project manager "],["othervignettes.html", "5.5 Other Vignettes", " 5.5 Other Vignettes 5.5.1 Simple Model-Data Comparisons 5.5.1.1 Author: Istem Fer, Tess McCabe In this tutorial we will compare model outputs to data outside of the PEcAn web interface. The goal of this is to demonstrate how to perform additional analyses using PEcAn’s outputs. To do this you can download each of the Output files, and then perform the analyses using whatever software you prefer, or you can perform analyses directly on the PEcAn server itself. Here we’ll be analyzing model outputs in R using a browser-based version of RStudio that’s installed on the server 5.5.1.2 Starting RStudio Server Open RStudio Server in a new window at URL/rstudio The username is carya and the password is illinois. To open a new R script click File > New File > R Script Use the Files browser on the lower right pane to find where your run(s) are located All PEcAn outputs are stored in the output folder. Click on this to open it up. Within the outputs folder, there will be one folder for each workflow execution. For example, click to open the folder PEcAn_99000000001 if that’s your workflow ID A workflow folder will have a few log and settings files (e.g. pecan.xml) and the following subfolders run contains all the inputs for each run out contains all the outputs from each run pft contains the parameter information for each PFT Within both the run and out folders there will be one folder for each unique model run, where the folder name is the run ID. Click to open the out folder. For our simple case we only did one run so there should be only one folder (e.g. 99000000001). Click to open this folder. Within this folder you will find, among other things, files of the format .nc. Each of these files contains one year of model output in the standard PEcAn netCDF format. This is the model output that we will use to compare to data. 5.5.1.3 Read in settings From an XML file ## Read in the xml settings<-PEcAn.settings::read.settings("~/output/PEcAn_99000000001/pecan.CONFIGS.xml") ## To read in the model output runid<-as.character(read.table(paste(settings$outdir, "/run/","runs.txt", sep=""))[1,1]) # Note: if you are using an xml from a run with multiple ensembles this line will provide only the first run id outdir<- paste(settings$outdir,"/out/",runid,sep= "") start.year<-as.numeric(lubridate::year(settings$run$start.date)) end.year<-as.numeric(lubridate::year(settings$run$end.date)) site.id<-settings$run$site$id File_path<-"~/output/dbfiles/AmerifluxLBL_site_0-772/AMF_US-NR1_BASE_HH_9-1.csv" ## Open up a connection to The Bety Database con <- PEcAn.DB::db.open(settings$database$bety) 5.5.1.4 Read in model output from specific variables model_vars<-c("time", "NEE") #varibles being read model <- PEcAn.utils::read.output(runid,outdir,start.year, end.year, model_vars,dataframe=TRUE) The arguments to read.output are the run ID, the folder where the run is located, the start year, the end year, and the variables being read. The README file in the Input file dropdown menu of any successful run lists the run ID, the output folder, and the start and end year. 5.5.1.5 Compare model to flux observations First load up the observations and take a look at the contents of the file File_format<-PEcAn.DB::query.format.vars(bety = con, format.id = 5000000002) #This matches the file with a premade "format" or a template that describes how the information in the file is organized site<-PEcAn.DB::query.site(site.id, con) #This tells PEcAn where the data comes from observations<-PEcAn.benchmark::load_data(data.path = File_path, format= File_format, time.row = File_format$time.row, site = site, start_year = start.year, end_year = end.year) #This will throw an error that not all of the units can be converted. That's ok, as the units of the varibles of interest (NEE) are being converted. File_Path refers to where you stored your observational data. In this example the default file path is an Ameriflux dataset from Niwot Ridge. File_format queries the database for the format your file is in. The default format ID “5000000002” is for csv files downloaded from the Ameriflux website. You could query for diffent kinds of formats that exist in bety or make your own. Here 772 is the database site ID for Niwot Ridge Forest, which tells pecan where the data is from and what time zone to assign any time data read in. Second apply a conservative u* filter to observations observations$NEE[observations$UST<0.2]<-NA Third align model output and observations aligned_dat = PEcAn.benchmark::align_data(model.calc = model, obvs.calc = observations, var ="NEE", align_method = "match_timestep") When we aligned the data, we got a dataframe with the variables we requested in a \\(NEE.m\\) and a \\(NEE.o\\) format. The \\(.o\\) is for observations, and the \\(.m\\) is for model. The posix column allows for easy plotting along a timeseries. Fourth, plot model predictions vs. observations and compare this to a 1:1 line ## predicted vs observed plot plot(aligned_dat$NEE.m, aligned_dat$NEE.o) abline(0,1,col="red") ## intercept=0, slope=1 Fifth, calculate the Root Mean Square Error (RMSE) between the model and the data rmse = sqrt(mean((aligned_dat$NEE.m-aligned_dat$NEE.o)^2,na.rm = TRUE)) na.rm makes sure we don’t include missing or screened values in either time series. Finally, plot time-series of both the model and data together ## plot aligned data plot(aligned_dat$posix, aligned_dat$NEE.o, type="l") lines(aligned_dat$posix,aligned_dat$NEE.m, col = "red") Bonus How would you compare aggregated data? Try RMSE against monthly NEE instead of half-hourly. In this case, first average the values up to monthly in the observations. Then, use align_data to match the monthly timestep in model output. NOTE: Align_data uses two seperate alignment function, match_timestep and mean_over_larger_timestep. Match_timestep will use only that data that is present in both the model and the observation. This is helpful for sparse observations. Mean_over_larger_timestep aggregates the values over the largest timestep present. If you were to look at averaged monthly data, you would use mean_over_larger_timestep. monthlyNEEobs<-aggregate(observations, by= list(month(observations$posix)), simplify=TRUE, FUN =mean, na.rm= TRUE) plottable<-align_data(model.calc = model, obvs.calc = monthlyNEEobs, align_method = "mean_over_larger_timestep", var= "NEE") head(plottable) 5.5.2 Data Assimilation Concepts The goal of this tutorial is to help you gain some hands-on familiarity with some of the concepts, tools, and techniques involved in Bayesian Calibration. As a warm-up to more advanced approaches to model-data fusion involving full ecosystem models, this example will focus on fitting the Farquhar, von Caemmerer, and Berry (1980) photosynthesis model [FvCB model] to leaf-level photosynthetic data. This is a simple, nonlinear model consisting of 3 equations that models net carbon assimilation, \\(A^{(m)}\\), at the scale of an individual leaf as a function of light and CO2. \\[A_j = \\frac{\\alpha Q}{\\sqrt{1+(\\alpha^2 Q^2)/(Jmax^2)}} \\frac{C_i- \\Gamma}{4 C_i + 8 \\Gamma}\\] \\[A_c = V_{cmax} \\frac{C_i - \\Gamma}{C_i+ K_C (1+[O]/K_O) }\\] \\[A^{(m)} = min(A_j,A_c) - r\\] The first equation \\(A_j\\) describes the RUBP-regeneration limited case. In this equation the first fraction is a nonrectangular hyperbola predicting \\(J\\), the electron transport rate, as a function of incident light \\(Q\\), quantum yield \\(\\alpha\\), and the assymptotic saturation of \\(J\\) at high light \\(J_{max}\\). The second equation, \\(A_c\\), describes the Rubisco limited case. The third equation says that the overall net assimilation is determined by whichever of the two above cases is limiting, minus the leaf respiration rate, \\(r\\). To keep things simple, as a Data Model (a.k.a. Likelihood or Cost Function) we’ll assume that the observed leaf-level assimilation \\(A^{(o)}\\) is Normally distributed around the model predictions with observation error \\(\\tau\\). \\[A^{(o)} \\sim N(A^{(m)},\\tau)\\] To fit this model to data we’re going to rely on a piece of statistical software known as JAGS. The above model would be written in JAGS as: model{ ## Priors Jmax ~ dlnorm(4.7,2.7) ## maximum electron transport rate prior alpha~dnorm(0.25,100) ##quantum yield (mol electrons/mole photon) prior vmax ~dlnorm(4.6,2.7) ## maximum rubisco capacity prior r ~ dlnorm(0.75,1.56) ## leaf respiration prior cp ~ dlnorm(1.9,2.7) ## CO2 compensation point prior tau ~ dgamma(0.1,0.1) for(i in 1:n){ ## electron transport limited Aj[i]<-(alpha*q[i]/(sqrt(1+(alpha*alpha*q[i]*q[i])/(Jmax*Jmax))))*(pi[i]-cp)/(4*pi[i]+8*cp) ## maximum rubisco limited without covariates Ac[i]<- vmax*(pi[i]-cp)/(pi[i]+Kc*(1+po/Ko)) Am[i]<-min(Aj[i], Ac[i]) - r ## predicted net photosynthesis Ao[i]~dnorm(Am[i],tau) ## likelihood } } The first chunk of code defines the prior probability distributions. In Bayesian inference every unknown parameter that needs to be estimated is required to have a prior distribution. Priors are the expression of our belief about what values a parameter might take on prior to observing the data. They can arise from many sources of information (literature survey, meta-analysis, expert opinion, etc.) provided that they do not make use of the data that is being used to fit the model. In this particular case, the priors were defined by Feng and Dietze 2013. Most priors are lognormal or gamma, which were choosen because most of these parameters need to be positive. After the priors is the Data Model, which in JAGS needs to be implemented as a loop over every observation. This is simply a codified version of the earlier equations. Table 1: FvCB model parameters in the statistical code, their symbols in equations, and definitions Parameter Symbol Definition alpha0 \\(\\alpha\\) quantum yield (mol electrons/mole photon) Jmax \\(J_{max}\\) maximum electron transport cp \\(\\Gamma\\) CO2 compensation point vmax0 \\(V_{cmax}\\) maximum Rubisco capacity (a.k.a Vcmax) r \\(R_d\\) leaf respiration tau \\(\\tau\\) residual precision q \\(Q\\) PAR pi \\(C_i\\) CO2 concentration 5.5.2.1 Fitting the model To begin with we’ll load up an example A-Ci and A-Q curve that was collected during the 2012 edition of the Flux Course at Niwot Ridge. The exact syntax below may be a bit confusing to those unaccustomed to R, but the essence is that the filenames line is looking up where the example data is stored in the PEcAn.photosynthesis package and the dat line is loading up two files (once A-Ci, the other A-Q) and concatanating them together. library(PEcAn.photosynthesis) ### Load built in data filenames <- system.file("extdata", paste0("flux-course-3",c("aci","aq")), package = "PEcAn.photosynthesis") dat<-do.call("rbind", lapply(filenames, read_Licor)) ## Simple plots aci = as.character(dat$fname) == basename(filenames[1]) plot(dat$Ci[aci],dat$Photo[aci],main="ACi") plot(dat$PARi[!aci],dat$Photo[!aci],main="AQ") In PEcAn we’ve written a wrapper function, \\(fitA\\), around the statistical model discussed above, which has a number of other bells and whistles discussed in the PEcAn Photosynthesis Vignette. For today we’ll just use the most basic version, which takes as arguments the data and the number of MCMC iterations we want to run. fit <- fitA(dat,model=list(n.iter=10000)) 5.5.2.2 What’s going on Bayesian numerical methods for model calibration are based on sampling parameter values from the posterior distribution. Fundamentally what’s returned is a matrix, with the number of iterations as rows and the number of parameters as columns, which are samples from the posterior distribution, from which we can approximate any quantity of interest (mean, median, variance, CI, etc.). The following plots follow the trajectory of two correlated parameters, Jmax and alpha. In the first figure, arrows show the first 10 iterations. Internally JAGS is choosing between a variety of different Bayesian sampling methods (e.g. Metropolis-Hasting, Gibbs sampling, slice sampling, rejection sampling, etc) to draw a new value for each parameter conditional on the current value. After just 10 steps we don’t have a good picture of the overall posterior, but it should still be clear that the sampling is not a complete random walk. params <- as.matrix(fit$params) xrng = range(fit$params[,"alpha0"]) yrng = range(fit$params[,"Jmax0"]) n = 1:10 plot(params[n,"alpha0"],params[n,"Jmax0"],type='n',xlim=xrng,ylim=yrng) arrows(params[n[-10],"alpha0"],params[n[-10],"Jmax0"],params[n[-1],"alpha0"],params[n[-1],"Jmax0"],length=0.125,lwd=1.1) After 100 steps, we can see a cloud start to form, with occassional wanderings around the periphery. n = 1:100 plot(params[n,"alpha0"],params[n,"Jmax0"],type='l',xlim=xrng,ylim=yrng) After \\(nrow(params)\\) steps what we see is a point cloud of samples from the joint posterior distribution. When viewed sequentially, points are not independent, but we are interested in working with the overall distribution, where the order of samples is not important. n = 1:nrow(params) plot(params[n,"alpha0"],params[n,"Jmax0"],type='p',pch="+",cex=0.5,xlim=xrng,ylim=yrng) 5.5.2.3 Evaluating the model output A really important aspect of Bayesian inference is that the output is the joint posterior probability of all the parameters. This is very different from an optimization approach, which tries to find a single best parameter value. It is also different from estimating the independent posterior probabilities of each parameter – Bayesian posteriors frequently have strong correlation among parameters for reasons having to do both with model structure and the underlying data. The model we’re fitting has six free parameters, and therefore the output matrix has 6 columns, of which we’ve only looked at two. Unfortunately it is impossible to visualize a 6 dimensional parameter space on a two dimensional screen, so a very common practice (for models with a small to medium number of parameters) is to look at all pairwise scatterplots. If parameters are uncorrelated we will typically see oval shaped clouds that are oriented in the same directions as the axes. For parameters with linear correlations those clouds will be along a diagonal. For parameters with nonlinear trade-offs the shapes of the parameter clouds can be more complex, such as the banana-shaped or triangular. For the FvCB model we see very few parameters that are uncorrelated or have simple linear correlations, a fact that we should keep in mind when interpreting individual parameters. pairs(params,pch=".") The three most common outputs that are performed on almost all Bayesian analyses are to look at the MCMC chains themselves, the marginal distributions of each parameter, and the overall summary statistics. The ‘trace’ diagrams below show the history of individual parameters during the MCMC sampling. There are different color lines that represent the fact that JAGS ran the MCMC multiple times, with each run (i.e. each color) being refered to as a different \\(chain\\). It is common to run multiple chains in order to assess whether the model, started from different points, consistently converges on the same answer. The ideal trace plot looks like white noise with all chains in agreement. The ‘density’ figures represent smoothed versions of the marginal distributions of each parameter. The tick marks on the x-axis are the actual samples. You will note that some posteriors will look approximately Normal, while others may be skewed or have clearly defined boundaries. On occassion there will even be posteriors that are multimodal. There is no assumption built into Bayesian statistics that the posteriors need be Normal, so as long as an MCMC has converged this diversity of shapes is valid. [note: the most common cause of multi-modal posteriors is a lack of convergence] Finally, the summary table reports, for each parameter, a mean, standard deviation, two variants of standard error, and standard quantile estimates (95% CI, interquartile, and median). The standard deviation of the posterior is a good summary statistic about how uncertain we are about a parameter. The Naive SE is the traditonal \\(\\frac{SD}{\\sqrt{n}}\\), which is an estimate of the NUMERICAL accuracy in our estimate of the mean. As we run the MCMC longer (i.e. take more samples), we get an answer that is numerically more precise (SE converges to 0) but the uncertainty in the parameter (i.e. SD) stays the same because that’s determined by the sample size of the DATA not the length of the MCMC. Finally, the Time-series SE is a variant of the SE calculation that accounts for the autocorrelation in the MCMC samples. In practice is is therefore more appropriate to use this term to assess numerical accuracy. plot(fit$params,auto.layout = FALSE) ## MCMC diagnostic plots summary(fit$params) ## parameter estimates Assessing the convergence of the MCMC is first done visually, but more generally the use of statistical diagnostics to assess convergence is highly encouraged. There are a number of metrics in the literature, but the most common is the Gelman-Brooks-Rubin statistic, which compare the variance within each chain to the variance across chains. If the chains have converged then this quantity should be 1. Values less than 1.05 are typically considered sufficient by most statisticians, but these are just rules-of-thumb. gelman.plot(fit$params,auto.layout = FALSE) gelman.diag(fit$params) As with any modeling, whether statistical or process-based, another common diagnostic is a predicted vs observed plot. In a perfect model the data would fall along the 1:1 line. The deviations away from this line are the model residuals. If observations lie along a line other than the 1:1 this indicates that the model is biased in some way. This bias is often assessed by fitting a linear regression to the points, though two important things are noteworthy about this practice. First, the \\(R^2\\) and residual error of this regression are not the appropriate statistics to use to assess model performance (though you will frequently find them reported incorrectly in the literature). The correct \\(R^2\\) and residual error (a.k.a Root Mean Square Error, RMSE) are based on deviations from the 1:1 line, not the regression. The code below shows these two terms calculated by hand. The second thing to note about the regression line is that the standard regression F-test, which assesses deviations from 0, is not the test you are actually interested in, which is whether the line differs from 1:1. Therefore, while the test on the intercept is correct, as this value should be 0 in an unbiased model, the test statistic on the slope is typically of less interest (unless your question really is about whether the model is doing better than random). However, this form of bias can easily be assessed by looking to see if the CI for the slope overlaps with 1. ## predicted vs observed plot par(mfrow=c(1,1)) mstats = summary(fit$predict) pmean = mstats$statistics[grep("pmean",rownames(mstats$statistics)),1] plot(pmean,dat$Photo,pch="+",xlab="Predicted A",ylab = "Observed A") abline(0,1,col=2,lwd=2) bias.fit = lm(dat$Photo~pmean) abline(bias.fit,col=3,lty=2,lwd=2) legend("topleft",legend=c("1:1","regression"),lwd=2,col=2:3,lty=1:2) summary(bias.fit) RMSE = sqrt(mean((pmean-dat$Photo)^2)) RMSE R2 = 1-RMSE^2/stats::var(dat$Photo) R2 confint(bias.fit) In the final set of plots we look at the actual A-Ci and A-Q curves themselves. Here we’ve added two interval estimates around the curves. The CI captures the uncertainty in the parameters and will asympotically shrink with more and more data. The PI (predictive interval) includes the parameter and residual error. If our fit is good then 95% PI should thus encompass at least 95% of the observations. That said, as with any statistical model we want to look for systematic deviations in the residuals from either the mean or the range of the PI. ## Response curve plot_photo(dat,fit) Note: on the last figure you will get warnings about “No ACi” and “No AQ” which can be ignored. These are occuring because the file that had the ACi curve didn’t have an AQ curve, and the file that had the AQ curve didn’t have an ACi curve. 5.5.2.4 Additional information There is a more detailed R Vignette on the use of the PEcAn photosynthesis module available in the PEcAn Repository. 5.5.2.5 Citations Dietze, M.C. (2014). Gaps in knowledge and data driving uncertainty in models of photosynthesis. Photosynth. Res., 19, 3–14. Farquhar, G., Caemmerer, S. & Berry, J.A. (1980). A biochemical model of photosynthetic CO2 assimilation in leaves of C3 species. Planta, 149, 78–90. Feng, X. & Dietze, M.C. (2013). Scale dependence in the effects of leaf ecophysiological traits on photosynthesis: Bayesian parameterization of photosynthesis models. New Phytol., 200, 1132–1144. 5.5.3 Parameter Data Assimilation 5.5.3.1 Objectives Gain hands-on experience in using Bayesian MCMC techniques to calibrate a simple ecosystem model using parameter data assimilation (PDA) Set up and run a PDA in PEcAn using model emulation technique, assimilating NEE data from Niwot Ridge Examine outputs from a PDA for the SIPNET model and evaluation of the calibrated model against i) data used to constrain model, ii) additional data for the same site 5.5.3.2 Larger Context Parameter data assimilation (PDA) occurs as one step in the larger process of model calibration, validation, and application. The goal of PDA is to update our estimates of the posterior distributions of the model parameters using data that correspond to model outputs. This differs from our previous use of PEcAn to constrain a simple model using data that map directly to the model parameters. Briefly, the recommended overall approach for model calibration and validation consists of the following steps: Assemble and process data sets required by the model as drivers Perform an initial test-run of the model as a basic sanity check Were there errors in drivers? (return to 1) Is the model in the same ballpark as the data? Construct priors for model parameters Collect/assemble the data that can be used to constrain model parameters and outputs Meta-analysis Sensitivity analysis (SA) Variance Decomposition (VD) Determine what parameters need further constraint Does this data exist in the literature? (repeat 4-8) Can I collect this data in the field? (repeat 4-8) Ensemble Analysis Is reality within the range of the uncertainty in the model? Evaluate/estimate uncertainties in the data Parameter Data Assimilation: Propose new parameter values Evaluate L(data | param) & prior(param) Accept or reject the proposed parameter values Repeat many times until a histogram of accepted parameter values approximates the true posterior distribution. Model evaluation [preferably ensemble based] Against data used to constrain model Against additional data for this site Same variable, different time Different variables Against data at a new site Do I need more data? Repeat 4-9 (direct data constraint) or 6-11 (parameter data assimilation). Application [preferably ensemble forecast or hindcast] 5.5.3.3 Connect to Rstudio Today, we’re again going to work mostly in Rstudio, in order to easily edit advanced PEcAn settings and browse files. So if you haven’t already, connect now to the Rstudio server on your VM ([URL]/rstudio). This tutorial assumes you have successfully completed an ensemble and a sensitivity analysis (Demo 2) before. 5.5.3.4 Defining variables The following variables need to be set specific to the site being run and the workflow being run workflow_id <- 99000000002 ## comes from the History table, your successful ensemble run's workflow ID ## from URL/bety/inputs. ## Search by Ameriflux ID (e.g. US-NR1) ## Find the "plain" site record (no CF or model name) that's on your server ## (probably last in the list) ## Click on the magnifying glass icon then look under "View Related Files" datadir <- "/home/carya/output/dbfiles/AmerifluxLBL_site_0-772/" ## where PEcAn is saving output (default OK on VM) outdir <- "/home/carya/output/" 5.5.3.5 Initial Ensemble Analysis A good place to start when thinking about a new PDA analysis is to look at the current model fit to observed data. In fact, we want to compare data to a full ensemble prediction from the model. This is important because our current parameter distributions will be the priors for PDA. While the analysis will translate these priors into more optimal (in terms of producing model output that matches observations) and more confident (i.e. narrower) posterior distributions, these results are inherently constrained by the current parameter distributions. Thus, if reality falls far outside the prior ensemble confidence interval (which reflects the current uncertainty of all model parameters), data assimilation will not be able to fix this. In such cases, the prior parameter estimates must already be over-constrained, or there are structural errors in the model itself that need fixing. To begin, let’s load up some NEE observations so that we can plot them along with our ensemble predictions. In the code below the elements in bold may vary depending on site and your previous runs. library(PEcAn.all) # read settings settings <- read.settings(file.path(outdir,paste0("PEcAn_",workflow_id),"pecan.CONFIGS.xml")) # open up a DB connection con <- PEcAn.DB::db.open(settings$database$bety) # Fill out the arguments needed by load_data function # read file format information format <- PEcAn.DB::query.format.vars(bety = con, format.id = 5000000002) start_year <- lubridate::year(settings$run$start.date) end_year <- lubridate::year(settings$run$end.date) vars.used.index <- which(format$vars$bety_name %in% c("NEE", "UST")) obs <-PEcAn.benchmark::load_data(data.path = file.path(datadir, "AMF_US-NR1_BASE_HH_9-1.csv"), format = format, start_year = start_year, end_year = end_year, site = settings$run$site, vars.used.index = vars.used.index, time.row = format$time.row) obs$NEE[obs$UST<0.4] <- NA ## U* filter NEEo <- obs$NEE Now let’s load up our ensemble outputs from the previous ensemble analysis (Demo 2) and plot our ensemble predictions against our NEE observations. # load outputs, try not to delete prePDA ensemble output filename from your environment prePDA_ensemble_output_file <- file.path(outdir,paste0("PEcAn_",workflow_id, "/ensemble.ts.", settings$ensemble$ensemble.id, ".NEE.2003.2006.Rdata")) load(prePDA_ensemble_output_file) # calculate CI pre_pda_ens <- ensemble.ts[["NEE"]] preCI <- apply(pre_pda_ens, 2, quantile, c(0.025, 0.5, 0.975), na.rm = TRUE) # plot model ensemble ymin <- min(min(c(preCI, NEEo), na.rm = TRUE)) ymax <- max(max(c(preCI, NEEo), na.rm = TRUE)) plot(preCI[2,], ylim = c(ymin, ymax), lwd = 2, xlab = "time", ylab = "NEE", main = "pre-PDA model ensemble vs data", type = "n") prepoly <- 1:dim(preCI)[2] polygon(c(prepoly, rev(prepoly)), c(preCI[3,], rev(preCI[1,])), col='khaki', border=NA) # add data points(NEEo, pch = ".", col= adjustcolor("purple",alpha.f=0.5)) legend("topright", legend=c("Data","Pre-PDA Model"), pch=c(15,15), col=c("purple","khaki")) When interpreting your results it is important to remember the difference between a confidence interval, which just includes parameter uncertainties, and a predictive interval, which includes parameter and residual uncertainties. Your ensemble analysis plot illustrates the former—i.e., the confidence in the mean NEE. By contrast, the data reflect both changes in mean NEE, and random variability. As such, we can’t expect all the data to fall within the CI; in fact, if we had unlimited data to constrain mean NEE, the CI would collapse to a single line and none of the data would be contained! However, your plot will give you an idea of how much uncertainty there is in your model currently, and help to identify systematic errors like bias (values consistently too high or low) or poorly represented seasonal patterns. 5.5.3.6 Questions: Does your ensemble agree well with the data? If so, how much room for improvement is there, in terms of tightening the CI? If not, what are the greatest discrepancies? What are some of the problems (with model, data, and/or PEcAn) that might explain the data-model disparity you see? 5.5.3.7 Choosing Parameters Beyond exploratory exercises, the first step of PDA analysis is to choose the model parameters you will target for optimization. PDA is computationally expensive (even when using an emulator), and the cost increases exponentially with the number of parameters targeted. The number you can handle in any given analysis completely depends on the complexity of the model and your available computational resources, but in practice it’s going to be rather small (~1–10) relative to the large number of parameters in a mechanistic ecosystem model (~10–100). Given this limitation, it is important to target parameters that can contribute substantially to improving model fit. If you recall, identifying those parameters was the goal of the uncertainty analysis you conducted previously, in the second PEcAn demo. Let’s revisit that analysis now. Open your variance decomposition graph from Demo 2 From this figure decide which variables you will target with PDA. As noted, an obvious criterion is that the parameter should be contributing a large amount of uncertainty to the current model, because otherwise it simply can’t change the model output much no matter how much you try to optimize it. But there are other considerations too. For example, if two parameters have similar or competing roles in the model, you may have trouble optimizing both simultaneously. In practice, there will likely be some guess-and-testing involved, though a good understanding of how the model works will help. It may also help to look at the shape of the Sensitivity responses and details of model fit to data (your ensemble analysis from the previous section). For the purposes of this demo, choose eight to ten parameters (in total, if you have more than one PFT) that contribute high uncertainty to model output and/or seem like good choices for some other rational reason. 5.5.3.8 Questions: Which parameters did you choose, and why? 5.5.3.9 Editing PEcAn settings Now let’s add settings to tell PEcAn how to run the PDA with emulator, we will come to the details of model emulation later. Open up the pecan.CONFIGS.xml file you located previously, and choose File > Save as... from the menu to save a new copy as pecan.PDA.xml. Now add the block of XML listed below to the file, immediately after the line. Check and fill in the parts corresponding to your run when necessary. In this block, use the <param.names><param> tags to identify the parameters you’ve chosen for PDA (it’s up to you to choose the number of parameters you want to constrain, then you can set the <n.knot> to be >= 10 per parameter you choose, e.g. 200 knots for 10 parameters). Here, you need to use PEcAn’s standard parameter names, which are generally not the same as what’s printed on your variance decomposition graph. To find your parameters look at the row names in the prior.distns.csv file for each PFT under the PFT pulldown menu. Insert the variable name (exactly, and case sensitive) into the <param> tags of the XML code below. In addition, you may need to edit <inputs><file><path>, depending on the site and year you ran previously. The rest of the settings control options for the PDA analysis (how long to run, etc.), and also identify the data to be used for assimilation. For more details, see the assim.batch vignette on the PEcAn GitHub page (https://goo.gl/9hYVPQ). <?xml version="1.0"?> <-- These lines are already in there. Don't duplicate them, <pecan> <-- just paste the <assim.batch> block below right after them. <assim.batch> <method>emulator</method> <n.knot>160</n.knot> <-- FILL IN <iter>25000</iter> <chain>3</chain> <param.names> <soil> <param>YOUR_PFT_1_PARAM_1</param> <-- FILL IN <param>YOUR_PFT_1_PARAM_2</param> <-- FILL IN </soil> <temperate.coniferous> <param>YOUR_PFT_2_PARAM_1</param> <-- FILL IN <param>YOUR_PFT_2_PARAM_2</param> <-- FILL IN <param>YOUR_PFT_2_PARAM_3</param> <-- FILL IN <param>YOUR_PFT_2_PARAM_4</param> <-- FILL IN <param>YOUR_PFT_2_PARAM_5</param> <-- FILL IN <param>YOUR_PFT_2_PARAM_6</param> <-- FILL IN </temperate.coniferous> </param.names> <jump> <adapt>100</adapt> <adj.min>0.1</adj.min> <ar.target>0.3</ar.target> </jump> <inputs> <file> <path> <path>/home/carya/output/dbfiles/AmerifluxLBL_site_0-772/AMF_US-NR1_BASE_HH_9-1.csv</path> </path> <format>5000000002</format> <input.id>1000011238</input.id> <-- FILL IN, from BETY inputs table, this is *NOT* the workflow ID <likelihood>Laplace</likelihood> <variable.name> <variable.name>NEE</variable.name> <variable.name>UST</variable.name> </variable.name> <variable.id>297</variable.id> </file> </inputs> </assim.batch> Once you’ve made and saved the changes to your XML, load the file and check that it contains the new settings: settings <- read.settings(file.path(outdir,paste0("PEcAn_",workflow_id),"pecan.PDA.xml")) settings$assim.batch If the printed list contains everything you just added to pecan.PDA.xml, you’re ready to proceed. 5.5.3.10 Investigating PEcAn function pda.emulator (optional) Before we run the data assimilation, let’s take a high-level look at the organization of the code. Use the Rstudio file browser to open up ~/pecan/modules/assim.batch/R/pda.emulator.R. This code works in much the same way as the pure statistical models that we learned about earlier in the week, except that the model being fit is a statistical model that emulates a complicated process-based computer simulation (i.e., an ecosystem model). We could have directly used the ecosystem model (indeed PEcAn’s other PDA functions perform MCMC by actually running the ecosystem model at each iteration, see pda.mcmc.R script as an example), however, this would require a lot more computational time than we have today. Instead here we will use a technique called model emulation. This technique allows us to run the model for a relatively smaller number of times with parameter values that have been carefully chosen to give a good coverage of parameter space. Then we can interpolate the likelihood calculated for each of those runs to get a surface that “emulates” the true likelihood and perform regular MCMC, except instead of actually running the model on every iteration to get a likelihood, this time we will just get an approximation from the likelihood emulator. The general algorithm of this method can be further expressed as: Propose initial parameter set sampling design Run full model for each parameter set Evaluate the likelihoods Construct emulator of multivariate likelihood surface Use emulator to estimate posterior parameter distributions (Optional) Refine emulator by proposing new design points, goto 2) For now, we just want you to get a glimpse at the overall structure of the code, which is laid out in the comment headers in pda.emulator(). Most of the real work gets done by the functions this code calls, which are all located in the file ~/pecan/modules/assim.batch/R/pda.utils.R and the MCMC will be performed by the mcmc.GP() function in ~/pecan/modules/emulator/R/minimize.GP.R. To delve deeper into how the code works, take a look at these files when you have the time. 5.5.3.11 Running a demo PDA Now we can go ahead and run a data assimilation MCMC with emulator. Since you’ve already loaded the settings containing your modified <assim.batch> XML block, all you need to do to start the PDA is run pda.emulator(settings). But, in the emulator workflow, there is a bit of a time consuming step where we calculate the effective sample size of the input data, and we have already done this step for you. You could load it up and pass it to the function explicitly in order to skip this step: # load input data load("/home/carya/pda/pda_externals.Rdata") postPDA.settings <- pda.emulator(settings, external.data = inputs_w_neff) After executing the code above, you will see print-outs to the console. The code begins with loading the prior values which in this case are the posterior distributions coming from your previous meta analysis. Then, normally, it loads the observational data and carries out necessary conversions and formatting to align it with model outputs, as we did separately above, but today it will skip this step as we are passing data externally. After this step, you will see a progress bar where the actual model is run n.knot times with the proposed parameter sets and then the outputs from these runs are read. Next, this model output is compared to the specified observational data, and the likelihood is calculated using the heteroskedastic Laplacian discussed previously. Once we calculate the likelihoods, we fit an emulator which interpolates the model output in parameter space between the points where the model has actually been run. Now we can put this emulator in the MCMC algorithm instead of the model itself. Within the MCMC loop the code proposes new parameter value from a multivariate normal jump distribution. The corresponding likelihood will be approximated by the emulator and the new parameter value is accepted or rejected based on its posterior probability relative to the current value. 5.5.3.12 Outputs from PEcAn’s Parameter Data Assimilation When the PDA is finished, a number of outputs are automatically produced that are either the same as or similar to posterior outputs that we’ve seen before. These are located in the PEcAn_[workflow_id]/pft/* output directory and are identified by pda.[PFT]_[workflow_id] in the filenames: posteriors.pda.[PFT]*.pdf shows the posterior distributions resulting from your PDA trait.mcmc.pda.[PFT]*.Rdata contains all the parameter samples contained in the PDA posterior mcmc.pda.[PFT]*.Rdata is essentially the same thing in a different format mcmc.diagnostics.pda.[PFT]*.pdf shows trace plots and posterior densities for each assimilated parameter, as well as pairs plots showing all pairwise parameter correlations. Together, these files allow you to evaluate whether a completed PDA analysis has converged and how the posterior distributions compare to the priors, and to use the posterior samples in further analyses, including additional PDA. If you haven’t done so already, take a look at all of the outputs described here. 5.5.3.13 Questions: Do the diagnostic figures indicate that your likelihood at least improved over the course of the analysis? Does the MCMC appear to have converged? Are the posterior distributions well resolved? 5.5.3.14 Post-PDA analyses In addition to the outputs of the PDA itself, you may want to conduct ensemble and/or sensitivity analyses based on the posteriors of the data assimilation, in order to check progress towards improved model fit and/or changing sensitivity. For this, you need to generate new model runs based on parameters sampled from the updated (by PDA) posterior, which is a simple matter of rerunning several steps of the PEcAn workflow. The PDA you ran has automatically produced an updated XML file (pecan.pda***.xml) that includes the posterior id to be used in the next round of runs. Locate this file in your run directory and load the file for the post-pda ensemble/sensitivity analysis (if you already have the settings list in your working environment you don’t need to re-read the settings): # read post-PDA settings if you don't have them in your wotking environment # replace the *** with the ensemble id given by the workflow # postPDA.settings <- read.settings(file.path(outdir,paste0("PEcAn_", workflow_id),"pecan.pda***.xml")) # Call model specific write.configs postPDA.settings <- run.write.configs(postPDA.settings, write=postPDA.settings$database$bety$write, ens.sample.method=postPDA.settings$ensemble$method) # Let's save the settings with the new ensemble id PEcAn.settings::write.settings(settings, outputfile=paste0('pecan.pda', postPDA.settings$assim.batch$ensemble.id,'.xml')) # Start ecosystem model runs, this one takes awhile... PEcAn.workflow::start_model_runs(postPDA.settings, postPDA.settings$database$bety$write) # Get results of model runs get.results(postPDA.settings) # Repeat ensemble analysis with PDA-constrained params run.ensemble.analysis(postPDA.settings, TRUE) # let's re-load the pre-PDA ensemble outputs load(prePDA_ensemble_output_file) pre_pda_ens <- ensemble.ts[["NEE"]] # nowload the post-PDA ensemble outputs postPDA_ensemble_output_file <- file.path(outdir,paste0("PEcAn_", workflow_id, "/ensemble.ts.", postPDA.settings$ensemble$ensemble.id, ".NEE.2003.2006.Rdata")) load(postPDA_ensemble_output_file) post_pda_ens <- ensemble.ts[["NEE"]] # try changing the window value for daily, weekly, monthly smoothing later # see if this changes your model-data agreement, why? window <- 1 # no smoothing pre_pda <- t(apply(pre_pda_ens, 1, function(x) { tapply(x, rep(1:(length(x)/window + 1), each = window)[1:length(x)], mean, na.rm = TRUE)})) post_pda <- t(apply(post_pda_ens, 1, function(x) { tapply(x, rep(1:(length(x)/window + 1), each = window)[1:length(x)], mean, na.rm = TRUE)})) fobs <- tapply(NEEo, rep(1:(length(NEEo) / window + 1), each = window)[1:length(NEEo)], mean, na.rm = TRUE) # save the comparison plots to pdf pdf(file.path(outdir,paste0("PEcAn_",workflow_id),"model.data.comparison.pdf"), onefile=T, paper='A4r', height=15, width=20) # now plot the pre-PDA ensemble similar to the way we did before preCI <- apply(pre_pda, 2, quantile, c(0.025, 0.5, 0.975), na.rm = TRUE) ymin <- min(min(c(preCI, fobs), na.rm = TRUE)) ymax <- max(max(c(preCI, fobs), na.rm = TRUE)) plot(pre_pda[1,], ylim = c(ymin, ymax), lwd = 2, xlab = "time", ylab = "NEE", main = "pre-PDA vs post-PDA", type = "n") prepoly <- 1:dim(preCI)[2] polygon(c(prepoly, rev(prepoly)),c(preCI[3,], rev(preCI[1,])),col='khaki',border=NA) # plot the post-PDA ensemble postCI <- apply(post_pda, 2, quantile, c(0.025, 0.5, 0.975), na.rm = TRUE) postpoly <- 1:dim(postCI)[2] polygon(c(postpoly, rev(postpoly)),c(postCI[3,], rev(postCI[1,])),col='lightblue',border=NA) # finally let's add the data and see how we did points(fobs, pch = ".", col= adjustcolor("purple",alpha.f=0.7)) legend("topright", legend=c("Data","Pre-PDA Model", "Post-PDA Model"), pch=c(15,15,15), col=c("purple","khaki","lightblue")) dev.off() # Repeat variance decomposition to see how constraints have changed run.sensitivity.analysis(postPDA.settings) Now you can check the new figures produced by your analyses under PEcAn_[workflow_id]/pft/*/variance.decomposition.*.pdf and PEcAn_[workflow_id]/pft/*/sensitivity.analysis.*.pdf, and compare them to the previous ones. Also, take a look at the comparison of model outputs to data when we run SIPNET with pre- and post-PDA parameter (mean) values under PEcAn_[workflow_id]/model.data.comparison.pdf. 5.5.3.15 Questions: Looking at the ensemble analysis outputs in order (i.e., in order of increasing ID in the filenames), qualitatively how did the model fit to data change over the course of the analysis? Based on the final ensemble analysis, what are the major remaining discrepancies between model and data? Can you think of the processes / parameters that are likely contributing to the differences? What would be your next steps towards evaluating or improving model performance? 5.5.4 State-Variable Data Assimilation 5.5.4.1 Objectives: Assimilate tree ring estimated NPP & inventory AGB within the SIPNET model in order to: Reconcile data and model NPP & AGB estimates Constrain inferences about other ecosystem responses. 5.5.4.2 Overview: Initial Run Settings Load and match plot and increment data Estimate tree-level data uncertainties Estimate allometric relationships Estimate stand-level NPP Sample initial conditions and parameters Run Ensemble Kalman Filter 5.5.4.3 Initial Run Perform a site-level SIPNET run using the following settings Site = UNDERC Start = 01/01/1979 End = 12/31/2015 Met = NARR Check Brown Dog When the run is complete, open the pecan.xml and cut-and-paste the outdir for later use 5.5.4.4 Settings: Open the PEcAn RStudio environment back up. Set your working directory to the outdir from above setwd(outdir) and shift the file browser to that location (Files > More > Go To Working Directory) Open up the latest settings file pecan.CONFIG.xml. At the top of the file add the following tags to set the ensemble size <state.data.assimilation> <n.ensemble>35</n.ensemble> <process.variance>FALSE</process.variance> <sample.parameters>TRUE</sample.parameters> <data> <format_id>1000000040</format_id> <input.id>1000013298</input.id> </data> <state.variables> <variable> <variable.name>NPP</variable.name> <unit>MgC/ha/yr</unit> <min_value>-9999</min_value> <max_value>9999</max_value> </variable> <variable> <variable.name>AbvGrndWood</variable.name> <unit>KgC/m^2</unit> <min_value>0</min_value> <max_value>9999</max_value> </variable> <variable> <variable.name>TotSoilCarb</variable.name> <unit>KgC/m^2</unit> <min_value>0</min_value> <max_value>9999</max_value> </variable> <variable> <variable.name>LeafC</variable.name> <unit>m^2/m^2</unit> <min_value>0</min_value> <max_value>9999</max_value> </variable> <variable> <variable.name>SoilMoistFrac</variable.name> <unit></unit> <min_value>0</min_value> <max_value>9999</max_value> </variable> <variable> <variable.name>SWE</variable.name> <unit>cm</unit> <min_value>0</min_value> <max_value>9999</max_value> </variable> <variable> <variable.name>Litter</variable.name> <unit>gC/m^2</unit> <min_value>0</min_value> <max_value>9999</max_value> </variable> </state.variables> <forecast.time.step>year</forecast.time.step> <start.date>1980/01/01</start.date> <end.date>2015/12/31</end.date> </state.data.assimilation> Delete the <pfts> block from the settings In the PEcAn History, go to your PDA run and open pecan.pda[UNIQUEID].xml (the one PEcAn saved for you AFTER you finished the PDA) Cut-and-paste the PDA <pfts> block into the SDA settings file Save the file as pecan.SDA.xml 5.5.4.5 Loading data If you have not done so already, clone (new) or pull (update) the PalEON Camp2016 repository Open a shell under Tools > Shell cd to go to your home directory To clone: git clone git@github.com:PalEON-Project/Camp2016.git To pull: cd Camp2016; git pull https://github.com/PalEON-Project/Camp2016.git master Open the tree-ring data assimilation workflow under Home > pecan > scripts > workflow.treering.R Run the script from the start up through the LOAD DATA section 5.5.4.6 Estimating tree-level data uncertainties One thing that is critical for data assimilation, whether it is being used to estimate parameters or state variables, is the careful consideration and treatment of the uncertainties in the data itself. For this analysis we will be using a combination of forest plot and tree ring data in order to estimate stand-level productivity. The basic idea is that we will be using the plot-sample of measured DBHs as an estimate of the size structure of the forest, and will use the annual growth increments to project that forest backward in time. Tree biomass is estimated using empirical allometric equations relating DBH to aboveground biomass. There are a number of sources of uncertainty in this approach, and before moving you are encouraged to think about and write down a few: Today we will use a statistical model based on the model developed by Clark et al 2007 that partitions out a number of sources of variability and uncertainty in tree ring and plot data (Fig 1). This model is a Bayesian statespace model that treats the true diameters (D) and increments (X) as latent variables that are connected through a fairly simple mixed effects process model \\[D_{ij,t+1} = D_{ij,t} + \\mu + \\alpha_{i} + \\alpha_t + \\epsilon_{ij,t}\\] where i = individual, j = plot, t = time (year). Each of these terms are represented at normal distributions, where \\(\\mu\\) is a fixed effect (overall mean growth rate) and individual and year are random effects \\[\\mu \\sim N(0.5,0.5)\\] \\[\\alpha_{i} \\sim N(0,\\tau_{i})\\] \\[\\alpha_{t} \\sim N(0,\\tau_{t})\\] \\[\\epsilon_{ij,t} \\sim N(0,\\tau_{e})\\] The connection between the true (latent) variable and the observations is also represented as normal with these variances representing measurement error: \\[D_{ij,t}^O \\sim N( D_{ij,t},\\tau_D)\\] \\[X_{ij,t}^O \\sim N( X_{ij,t},\\tau_r)\\] Finally, there are five gamma priors on the precisions, one for the residual process error (\\(\\tau_{e}\\)), two for the random effects on individual (\\(\\tau_{i}\\)) and time (\\(\\tau_t\\)), and two measurement errors or DBH (\\(\\tau_D\\)) and tree rings (\\(\\tau_r\\)) \\[\\tau_{e} \\sim Gamma(a_e,r_e)\\] \\[\\tau_{i} \\sim Gamma(a_i,r_i)\\] \\[\\tau_{t} \\sim Gamma(a_t,r_t)\\] \\[\\tau_{D} \\sim Gamma(a_D,r_D)\\] \\[\\tau_{r} \\sim Gamma(a_r,r_r)\\] This model is encapsulated in the PEcAn function: InventoryGrowthFusion(combined,n,iter) where the first argument is the combined data set formatted for JAGS and the second is the number of MCMC interations. The model itself is written for JAGS and is embedded in the function. Running the above InventoryGrowthFusion will run a full MCMC algorithm, so it does take a while to run. The code returns the results as an mcmc.list object, and the next line in the script saves this to the outputs directory We then call the function InventoryGrowthFusionDiagnostics to print out a set of MCMC diagnostics and example time-series for growth and DBH. 5.5.4.7 Allometric equations Aboveground NPP is estimated as the increment in annual total aboveground biomass. This estimate is imperfect, but not unreasonable for demonstration purposes. As mentioned above, we will take an allometric approach of scaling from diameter to biomass: Biomass = b0 * DBH b1 We will generate the allometric equation on a PFT level using another Bayesian model that synthesizes across a database of species-level allometric equations (Jenkins et al 2004). This model has two steps within the overall MCMC loop. First it simulates data from each equation, including both parameter and residual uncertainties, and then it updated the parameters of a single allometric relationship across all observations. The code also fits a second model, which includes a random site effect, but for simplicity we will not be using output from this version. Prior to running the model we have to first query the species codes for our pfts. Next we pass this PFT list to the model, AllomAve, which saves the results to the output directory in addition to returning a summary of the parameters and covariances. 5.5.4.8 Estimate stand-level NPP If we have no uncertainty in our data or allometric equations, we could estimate the stand aboveground biomass (AGB) for every year by summing over the biomass of all the trees in the plot and then divide by the plot area. We would then estimate NPP by the difference in AGB between years. One approach to propagating uncertainties into NPP would be to transform the distribution of DBH for each individual tree and year into a distribution for biomass, then sum over those distributions to get a distribution for AGB and then subtract the distributions to get the distributions of NPP. However, if we do this we will greatly overestimate the uncertainty in NPP because we ignore the fact that our increment data has much lower uncertainty than our diameter data. In essence, if we take a random draw from a distribution of AGB in year and it comes back above average, the AGB is much more likely to also be above average the following year than if we were to do an independent draw from that distribution. Accounting for this covariance requires a fairly simple change in our approach and takes advantage of the nature of the MCMC output. The basic idea is that we are going to take a random draw from the full individual x year diameter matrix, as well as a random draw of allometric parameters, and perform the ‘zero-error’ calculation approach described above. We will then create a distribution of all the NPP estimates that comes out of repeated draws for the full diameter matrix. This approach is encapsulated in the function plot2AGB. The argument unit.conv is a factor that combines both the area of the plot and the unit conversion from tree biomass (kg/tree) to stand AGB (Mg/ha). There are two outputs from plot2AGB: a pdf depicting estimated NPP and AGB (mean and 95% CI) time series, with each page being a plot; and plot2AGB.Rdata, a binary record of the function output that is read into the data assimilation code. The latter is also returned from the fuction and assigned to the variable “state”. Finally, we calculate the mean and standard deviation of NPP and save this as obs. 5.5.4.9 Build Initial Conditions The function sample.IC.SIPNET uses the AGB estimate from the previous function in order to initialize the data assimilation routine. Specifically it samples n.ensemble values from the first time step of the AGB estimate. Embedded in this function are also a number of prior distributions for other state variables, which are also samples in order to create a full set of initial conditions for SIPNET. 5.5.4.10 Load Priors The function sample.parameters samples values from the most recent posterior parameter distributions. You can also specify a specific set of parameters so sample from by specifying the argument <prior> within <assim.sequential> as the posterior.id you want to use. This is useful to know if you want to go back and run with the Meta-analysis posteriors, or if you end up rerunning the meta-analysis and need to go back and specify the parameter data assimilation posteriors instead of the most recent. 5.5.4.11 Ensemble Kalman Filter The function sda.enkf will run SIPNET in Ensemble Kalman Filter mode. The output of this function will be all the of run outputs, a PDF of diagnostics, and an Rdata object that includes three lists: FORECAST will be the ensemble forecasts for each year ANALYSIS will be the updated ensemble sample given the NPP observations enkf.params contains the prior and posterior mean vector and covariance matrix for each time step. If you look within this function you will find that much of the format is similar to the pda.mcmc function, but in general is much simpler. The function begins by setting parameters, opening a database connection, and generating workflow and ensemble ID’s. Next we split the SIPNET clim meteorology file up into individual annual files since we will be running SIPNET for a year at a time between updates. Next we perform an initial set of runs starting from the initial states and parameters we described above. In doing so we create the run and output directories, the README file, and the runs.txt file that is read by start_model_runs. Worth noting is that the README and runs.txt don’t need to be updated within the forecast loop. Given this initial run we then enter the forecast loop. Within this loop over years we perform four basic steps. First, we read the output from the latest runs. Second, we calculate the updated posterior state estimates based on the model ensemble prior and observation likelihood. Third, we resample the state estimates based on these posterior parameters. Finally, we start a new set of runs based on this sample. The sda.enfk function then ends by saving the outputs and generating some diagnostic figures. The first set of these shows the data, forecast, analysis. The second set shows pairs plots of the covariance structure for the Forecast and Analysis steps. The final set shows the time-series plots for the Analysis of the over state variables produced by SIPNET. 5.5.4.12 Finishing up The final bit of code in the script will register the workflow as complete in the database. After this is run you should be able to find all of the runs, and all of the outputs generated above, from within the PEcAn webpages. 5.5.5 PEcAn: Testing the Sensitivity Analysis Against Observations” 5.5.5.1 Author: “Ankur Desai” 5.5.5.2 Flux Measurements and Modeling Course, Tutorial Part 2 This tutorial assumes you have successfully completed the Demo01, Demo02 and the modelVSdata tutorial. 5.5.5.3 Introduction Now that you have successfully run PEcAn through the web interface and have learned how to do a simple comparison of flux tower observations to model output, let’s start looking at how data assimilation and parameter estimation would work with an ecosystem model. Before we start a full data assimilation exercise, let’s try something simple – single parameter selection by hand. Open http://localhost:3280/rstudio or http://localhost:6480/rstudio or the Amazon URL/rstudio if running on the cloud. In Demo02, you have ran a sensitivity analysis of SIPNET model runs at Niwot Ridge sampling across quantiles of a parameter prior, while holding all others to the median value. The pecan.xml file told PEcAn to run an sensitivity analysis, which simply meant SIPNET was run multiple times with the same driver, but varying parameter values one at a time (while holding all others to their median), and the parameter range also specified in the pecan.xml file (as quantiles, which were then sampled against the BETY database of observed variation in the parameter for species within the specific plant functional type). Let’s try to compare Ameriflux NEE to SIPNET NEE across all these runs to make a plot of parameter vs. goodness-of-fit. We’ll start with root mean square error (RMSE), but then discuss some other tests, too. 5.5.5.4 A. Read in settings object from a run xml Open up a connection to the bety database and create a settings object from your xml, just like in the modelVSdata tutorial. settings<-PEcAn.settings::read.settings("~/output/PEcAn_99000000002/pecan.CONFIGS.xml") con <- PEcan.DB::db.open(settings$database$bety) We read in the pecan.CONFIG.xml instead of the pecan.xml because the pecan.CONFIG.xml has already incorperated some of the information from the database we would have to query again if we used the pecan.xml. runid<-as.character(read.table(paste(settings$outdir, "/run/","runs.txt", sep=""))[1,1]) # Note: if you are using an xml from a run with multiple ensembles this line will provide only the first run id outdir<- paste(settings$outdir,"/out/",runid,sep= "") start.year<-as.numeric(lubridate::year(settings$run$start.date)) end.year<-as.numeric(lubridate::year(settings$run$end.date)) site.id<-settings$run$site$id Back to the files pane, within the run/ folder, find a folder called pft/ and within that a folder with the pft name (such as temprature.coniferous). Within that is a PDF file that starts sensitivity.analysis. In Rstudio, just click on the PDF to view it. You discussed this PDF last tutorial, through the web interface. Here, we see how the model NEE in SIPNET changes with each parameter. Let’s read that sensitivity output. Navigate back up (..) to the ~/output/RUNDIR/ folder. Find a series of files that end in “.RData”. These files contain the R variables used to make these plots. In particular, there is sensitivity.output..RData which contains the annual NEE as a function of each parameter quantile. Click on it to load a variable into your environment. There is sensitivity.results..RData which contains plotting functions and variance decomposition output, which we don’t need in this tutorial. And finally, there is **sensitivity.samples.*.RData** which contains the actual parameter values and the RunIDs associated with each sensitivity run. Click on sensitivity.samples..RData* to load it into your environment, or run the {r}load() script below. You should see a set of five new variables (pft.names, trait.names, sa.ensemble.id, sa.run.ids, sa.samples). Let’s extract a parameter and it’s sensitivity NEE output from the list sa.samples, which is organized by PFT, and then by parameter. First, let’s look at a list of PFTs and parameters available: load(paste(settings$outdir,"/sensitivity.samples.", settings$sensitivity.analysis$ensemble.id,".Rdata", sep="")) names(sa.samples) names(sa.samples$temperate.coniferous) Now to see the actual parameter values used by the runs, just pick a parameter and type: sa.samples$temperate.coniferous$psnTOpt Let’s store that value for future use: psnTOpt <- sa.samples$temperate.coniferous$psnTOpt Now, to see the annual NEE output from the model for a particular PFT and parameter range, try load(paste(settings$outdir,paste("/sensitivity.output", settings$sensitivity.analysis$ensemble.id,settings$sensitivity.analysis$variable,start.year,end.year,"Rdata", sep="."), sep="")) sensitivity.output$temperate.coniferous$psnTOpt You could even plot the two: plot(psnTOpt,sensitivity.output$temperate.coniferous$psnTOpt) What do you notice? Let’s try to read the output from a single run id as you did in the earlier tutorial. runids <- sa.run.ids$temperate.coniferous$psnTOpt arun <- PEcAn.utils::read.output(runids[1], paste(settings$outdir, "out", runids[1], sep="/"), start.year= start.year, end.year= end.year,"NEE", dataframe = TRUE) plot(arun$posix,arun$NEE) 5.5.5.5 B. Now let’s bring in the actual observations Recall reading Ameriflux NEE in the modelVSdata tutorial. File_path<-"~/output/dbfiles/AmerifluxLBL_site_0-772/AMF_US-NR1_BASE_HH_9-1.csv" File_format<-PEcAn.DB::query.format.vars(bety = con, format.id = 5000000002) #This matches the file with a premade "format" or a template that describes how the information in the file is organized site<-PEcAn.DB::query.site(site.id = site.id, con) obs<-PEcAn.benchmark::load_data(data.path = File_path, format= File_format, time.row = File_format$time.row, site = site, start_year = start.year, end_year = end.year) obs$NEE[obs$UST<0.2]<-NA #Apply a U* filter plottable<-align_data(model.calc = arun, obvs.calc = obs, align_method = "match_timestep", var= "NEE") head(plottable) 5.5.5.6 C. Finally, we can finally compare model to data In the modelVSdata, you also compared NEE to the ensemble model run. Here we will do the same except we include each sensitivity run. plot(plottable$NEE.m,plottable$NEE.o) abline(0,1,col="red") And remember the formula for RMSE: sqrt(mean((plottable$NEE.o-plottable$NEE.m)^2,na.rm = TRUE)) All we need to do to go beyond this is to make a loop that reads in each sensitivity run NEE based on runids, calculates RMSE against the observations, and stores it in an array, by combining the steps above in a for loop. Make sure you change the directory names and year to your specific run. rmses <- rep(0,length(runids)) for(r in 1:length(runids)){ arun <- read.output(runids[r],paste(settings$outdir, "out", runids[r], sep="/"),2004,2004,"NEE", dataframe= TRUE) plottable<-align_data(model.calc = arun, obvs.calc = obs, align_method = "match_timestep", var= "NEE") rmses[r] <- sqrt(mean((plottable$NEE.o-plottable$NEE.m)^2,na.rm = TRUE)) } rmses Let’s plot that array plot(psnTOpt,rmses) Can you identify a minimum (if there is one)? If so, is there any reason to believe this is the “best” parameter? Why or why not? Think about all the other parameters. Now that you have the hang of it, here are a few more things to try: Try a different error functions, given actual NEE uncertainty. You learned earlier that uncertainty in half-hourly observed NEE is not Gaussian. This makes RMSE not the correct measure for goodness-of-fit. Go to ~/pecan/modules/uncertainty/R, open flux_uncertainty.R, and click on the source button in the program editing pane. Alternatively, you can source the function from the console using: source("pecan/modules/uncertainty/R/flux_uncertainty.R") Then you can run: unc <- flux.uncertainty(plottable$NEE.o,QC=rep(0,17520)) plot_flux_uncertainty(unc) The figure shows you uncertainty (err) as a function of NEE magnitude (mag). How might you use this information to change the RMSE calculation? Try a few other parameters. Repeat the above steps but with a different parameter. You might want to select one from the sensitivity PDF that has a large sensitivity or from the variance decomposition that is also poorly constrained. "],["advanced-user.html", "5.6 Advanced User Guide", " 5.6 Advanced User Guide Workflow curl submission 5.6.1 Submitting Workflow from Command Line This is how you can submit a workflow from the command line through the pecan web interface. This will use curl to submit all the requireed parameters to the web interface and trigger a run. # the host where the model should run # never use remote sites since you will need to pass your username/password and that WILL be stored hostname=pecan.vm # the site id where to run the model (NIWOT in this case) siteid=772 # start date and end date, / need to be replaced with %2F or use - (NOT TESTED) start=2004-01-01 end=2004-12-31 # if of model you want to run, rest of section parameters depend on the model selected (SIPNET 136) modelid=5000000002 # PFT selected (we should just use a number here) # NOTE: the square brackets are needed and will need be escaped with a \\ if you call this from command line pft[]=temperate.coniferous # initial pool condition (-1 means nothing is selected) input_poolinitcond=-1 # met data input_met=99000000006 # variables to collect variables=NPP,GPP # ensemble size runs=10 # use sensitivity analysis sensitivity=-1,1 # redirect to the edit pecan.xml file pecan_edit=on # redirect to edit the model configuration files model_edit=on # use browndog browndog=on For example the following will run the above workflow. Using -v in curl will show verbose output (needed) and the grep will make sure it only shows the redirect. This will show the actual workflowid: curl -s -v 'http://localhost:6480/pecan/04-runpecan.php?hostname=pecan.vm&siteid=772&start=2004-01-01&end=2004-12-31&modelid=5000000002&pft\\[\\]=temperate.coniferous&input_poolinitcond=-1&input_met=99000000006' 2>&1 | grep 'Location:' < Location: 05-running.php?workflowid=99000000004 In this case you can use the browser to see progress, or use the following to see the status: curl -s 'http://localhost:6480/pecan/dataset.php?workflowid=99000000004&type=file&name=STATUS' TRAIT 2017-12-13 08:56:56 2017-12-13 08:56:57 DONE META 2017-12-13 08:56:57 2017-12-13 08:57:13 DONE CONFIG 2017-12-13 08:57:13 2017-12-13 08:57:14 DONE MODEL 2017-12-13 08:57:14 2017-12-13 08:57:15 DONE OUTPUT 2017-12-13 08:57:15 2017-12-13 08:57:15 DONE ENSEMBLE 2017-12-13 08:57:15 2017-12-13 08:57:16 DONE FINISHED 2017-12-13 08:57:16 2017-12-13 08:57:16 DONE Or to show the output log: curl -s 'http://localhost:6480/pecan/dataset.php?workflowid=99000000004&type=file&name=workflow.Rout' R version 3.4.3 (2017-11-30) -- "Kite-Eating Tree" Copyright (C) 2017 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. .... "],["basic-web-workflow.html", "6 Basic Web workflow", " 6 Basic Web workflow This chapter describes the major steps of the PEcAn web-based workflow, which are as follows: Model and site selection Model configuration Run execution – TODO! Results – TODO! Interactive visualizations – TODO! We recommend that all new users begin with [PEcAn Hands-On Demo 01: Basic Run]. The documentation below assumes you are already familiar with how to navigate to PEcAn’s interactive web interface for running models. "],["web-site-model.html", "6.1 Site and model selection", " 6.1 Site and model selection This page is used to select the model to run and the site at which you would like to run that model. NOTE: If this page does not load for you, it may be related to a known Google Maps API key issue. See issue #1269 for a possible solution. 6.1.1 Selecting a model On the Select Host webpage use the Host pull-down menu to select the server you want to run on. PEcAn is designed to allow models to be run both locally and on remote high-performance computing (HPC) resources (i.e. clusters). We recommend that users start with local runs. More information about connecting your PEcAn instance to a cluster can be found on the Remote execution with PEcAn page. Next, select the model you want to run under the Model pull-down menu. The list of models currently supported by PEcAn, along with information about these models, is available on the PEcAn Models page. If a PEcAn-supported model is not listed, this is most likely because the model has not been installed on the server. The PEcAn team does not have permissions to redistribute all of the models that are coupled to it, so you will have to install some PEcAn-compatible models yourself. Please consult the PEcAn model listing for information about obtaining and installing different models. Once the model is installed and you have added the location of the model executable to Bety (see Adding An Ecosystem Model), your model should appear on the PEcAn Select Host page after your refresh the page. If you would like to add a new model to PEcAn please consult our guide for Adding an Ecosystem Model and contact the PEcAn team for assistance. If selecting your model causes your site to disappear from the Google Map, that means the site exists but there are no drivers for that model-site combination registered in the database. Click the “Conversion” checkbox. If your site reappears, that means PEcAn should be able to automatically generate the required inputs for this site by converting from existing input files in other formats. If the site still does not reappear, that means there are required input files for that model-site combination that PEcAn cannot autogenerate. This may be because the model has unique input requirements or because it has not yet been fully coupled to the PEcAn input processing workflow. Go to the troubleshooting section under Selecting a site for more information on diagnosing what drivers are missing. 6.1.2 Selecting a site 6.1.3 Site Groups PEcAn provides the option of organizing sites into groups to make them easier to find and easier to run as a group. We have pre-loaded a number of common research networks (e.g., FLUXNET, LTER, NEON), but you are free to create new site groups through Bety. If you are searching for a site that is not part of an existing site group, or you are unsure which site group it belongs to, select “All Sites” to see all sites in Bety. Note that this may take a while to render. 6.1.4 Using existing sites Find the site on the map The simplest way of determining if a site exists in PEcAn is through the Google Map interface of the web-based workflow. You’ll want to make sure that the “Site Group” is set to “All Sites” and the “Model” is set to “All Models”. Find the site in BETY If the site is not on the map, it may still be in Bety but with insufficient geographic information. To locate the site in Bety, first login to your local version of the BETY database. If using the VM, navigate to localhost:6480/bety and login with username bety and password illinois. Then, navigate to Data > Sites and use the “Search” box to search for your site. If you do find your site, click “Edit” and add geographic information so that the site will show up on the map. Also, note that the site ID number shows up in the URL for the “Show” or “Edit” pages. This ID is often useful to know, for example when editing a PEcAn settings file by hand. If you did not find you site, follow the instructions below to add a site. 6.1.5 Adding a new site (TODO: Move most of this out) Log into Bety as described above. Pick a citation for your site Each site requires an associated “citation” that must be added before the site itself is added. First, navigate to “Data > Citations” and use the “Search” box to see if the relevant citation already exists. If it does, click the check mark under “Actions” to proceed to site creation. To create a new citation, click the New Citation button, fill in the fields, and then click “Create”. The “field URL” should contain the web address that takes you to this publication on the publisher’s website. The “PDF” field should be the full web address to a PDF for this citation. Note that our definition of a citation is flexible, and a citation need not be a peer-reviewed publication. Most of the fields in “New Citation” can be left blank, but we recommend at least adding a descriptive title, such as “EBI Farm Field Data” and a relevant contact person as the “Author”. Once the Citation is created or selected this should automatically take you to the Sites page and list any Sites already associated with this citation. To create a new site click the New Site button. When creating a new site, the most important fields are the Site name and coordinates (latitude and longitude). The coordinates can be entered by hand or by clicking on the site location on the Google Map interface. All other information is optional, but can be useful for searching and indexing purposes. When you are done click Create. At this point, once the PEcAn site-level page is refreshed, the site should automatically appear. 6.1.6 Troubleshooting 6.1.6.1 My site shows up when I don’t have any model selected, but disappears once I select the model I want to run Selecting a model will cause PEcAn to filter the available sites based on whether they possess the required Inputs for a given model (e.g. meteorology). To check what Inputs are missing for a site point your browser to the pecan/checksite.php webpage (e.g. localhost:6480/pecan/checksite.php). This page looks virtually identical to the site selection page, except that it has a Check button instead of Prev and Next. If you select a Machine, Model, and Site and then click Check the page should return a list of what Inputs are missing (listing both the name and the Format ID number). Don’t forget that its possible for PEcAn to have required Inputs in its database, but just not have them for the Machine where you want to run. To see more about what Inputs a given model can accept, and which of those are required, take a look at the MODEL_TYPE table entry in the database (e.g. go to localhost:6480/bety; Select Runs > Model Type; and then click on the model you want to run). For information about loading missing Inputs into the database visit Input records in BETY, and also read the rest of the pages under this section, which will provide important information about the specific classes of Inputs (e.g. meteorology, vegetation, etc). Finally, we are continually developing and refining workflows and standards for processing Input data in a model-agnostic way. The details about what Inputs can be processed automatically are discussed input-by-input in the sections below. For those looking to dive into the code or troubleshoot further, these conversions are ultimately handled under the PEcAn.workflow::do_conversions workflow module. "],["web-model-config.html", "6.2 Model configuration", " 6.2 Model configuration This page is used for basic model configuration, including when your model will run and what input data it will use. 6.2.1 Choosing meteorology Once a Machine, Model, and Site have been selected, PEcAn will take you to the Input selection page. From this page you will select what Plant Functional Type (PFT) you want to run at a site, the start and end dates of the run, and various Input selections. The most common of these across all models is the need to specify meteorological forcing data. The exact name of the menu item for meteorology will vary by model because all of the Input requirements are generated individually for each model based on the MODEL_TYPE table. In general there are 3 possible cases for meteorology PEcAn already has driver files in its database PEcAn does not have drivers, but can generate them from publicly available data You need (or want) to upload your own drivers The first two cases will appear automatically in the the pull down menu. For meteorological files that already exist you will see the date range that’s available. By contrast, met that can be generated will appear as “Use ”, where is the origin of the data (e.g. “Use Ameriflux” will use the micromet from an Ameriflux eddy covariance tower, if one is present at the site). If you want to upload your own met data this can be done in three ways. The default way to add met data is to incorporate it into the overall meteorological processing workflow. This is preferred if you are working with a common meteorological data product that is not yet in PEcAn’s workflow. This case can be divided into two special cases: Data is in a common MIME-type that PEcAn already has a converter for (e.g. CSV). In this case you’ll want to create a new Format record for the meta-data so that the existing converter can process this data. See documentation for [Creating a new Format record in BETY] for more details. Data is in a more complicated format or interactive database, but large/useful enough to warrent a custom conversion function. Details on creating custom met conversions is in the [Input Conversion], though at this stage you would also be strongly encouraged to contact the PEcAn development team. The second-best way is to upload data in PEcAn’s standard meteorological format (netCDF files, CF metadata). See [Input Conversion] for details about variables and units. From this standard, PEcAn can then convert the file to the model-specific format required by the model you have chosen. This approach is preferred for a rare or one-off meterological file format, because PEcAn will also be able to convert the file into the format required by any other model as well. The last option for adding met data is to add it in a model-specific format, which is often easiest if you’ve already been running your model at a site and are just switching to using PEcAn. 6.2.2 Met workflow In a nutshell, the PEcAn met workflow is designed to reduce the problem of converting n possible met inputs into m possible model formats, which requires n x m conversion functions as well as numerous custom functions for downscaling, gap filling, etc. Instead, PEcAn works with a single met standard, and thus requires n conversion functions, one for converting each data source into the PEcAn standard, and then m conversion functions for converting from that standard to what an individual model requires. For a new model joining the PEcAn system the burden in particularly low – writing one conversion function provides access to n inputs. Similarly, PEcAn performs all other operations/manipulations (extracting a site, downscaling, gap filling, etc) within the PEcAn standard, which means these operations only need be implemented once. Consider a generic met data product named MET for simplicity. PEcAn will use a function, download.MET, to pull data for the selected year from a public data source (e.g. Ameriflux, North American Regional Reanalysis, etc). Next, PEcAn will use a function, met2CF.MET, to convert the data into the PEcAn standard. If the data is already at the site scale it will then gapfill the data. If the data is a regional or global data product, PEcAn will then permute the data to allow easier site-level extraction, then it will extract data for the requested site and data range. Modules to address the temporal and spatial downscaling of meteorological data products, as well as their uncertainties, are in development but not yet part of the operational workflow. All of these functions are located within the data.atmosphere module. Once data is in the standard format and processed, it will be converted to the model-specific format using a met2model.MODEL function (located in that MODEL’s module). More detailed information on how PEcAn processes inputs can be found on our [Input Conversion] page. 6.2.3 Troubleshooting meteorological conversions At the current moment, most of the issues below address possible errors that the Ameriflux meteorology workflow might report 6.2.3.1 Could not do gapfill … The following variables have NA’s This error message means that there were gaps in the downloaded data, for whatever variables that were listed, which were larger than the current algorithm could fill. Particularly common is missing radiation or PAR data, as Ameriflux frequently converts nighttime data to NULL, and work is in progress to detect this based on solar geometry. Also common are incomplete years (first or last year of tower operations). 6.2.3.2 Could not get information about . Is this an Ameriflux site? This message occurs when PEcAn believes that a site is part of Ameriflux (because it was listed on the Ameriflux or FLUXNET webpage and has a US-* site code), but no data is present on the Ameriflux server. The most common reasons for this is that you have selected a site that has not submitted data to Ameriflux yet (or that data hasn’t been processed yet), or you have selected a year that’s outside the tower’s operational period. Visit Ameriflux and FLUXNET for lists of available site years. 6.2.3.3 Could not download data for for the year This is similar to the previous error, but in this case PEcAn did find data for the site listed, but just not for the year requested. This can usually be fixed by just altering the years of the run to match those with available data. 6.2.3.4 I could not find the requested var (or dimvar) in the file! PEcAn could not find a required variable within the downloaded file. Most likely this is due to that variable not being measured at this site. The most common cause of failure is the absence of atmospheric pressure data (PRESS), but since most models have a low sensitivity to this variable we are working on methods to estimate this from other sources. "],["selecting-plant-functional-types-pfts-and-other-parameter-groupings..html", "6.3 Selecting Plant Functional Types (PFTs) and other parameter groupings.", " 6.3 Selecting Plant Functional Types (PFTs) and other parameter groupings. 6.3.1 Using existing PFTs PEcAn does not automatically know what vegetation types are present at your study site so you need to select the PFT. Some models, such as ED2 and LINKAGES, support competition among multiple PFTs and thus you are encouraged to highlight multiple choices. Other models, such as SIPNET and DALEC, only support one PFT at a site. Many models also have parameters that control non-vegetation processes (e.g. soil biogeochemistry and hydrology). PEcAn allows users to assign these parameters to functional groups as well (e.g. a soils PFT) 6.3.2 Creating new PFTs To modify or add a new Plant Functional Type (PFT), or to change a PFT’s priors, navigate on the grey menu bar to Data > PFTs To add a new pft, click “new PFT” at the top and enter a name and description. (hint: we’re trying to name PFTs based on model.biome.pft, ED2 is the default model if one isn’t specified) To add new species to a PFT click on [+] View Related Species and type the species, genus, or family you are looking for into the Search box. Click on the + to add. To remove a species from a PFT, click on [+] View Related Species and click on the X of the species you want to remove from the PFT. To remove a prior, click [-] View Related Prior and click on the X of the variable who’s prior you want to remove. This will cause the parameter to be excluded from all analyses (meta-analysis, sensitivity analysis, etc) and revert to its default value. To add a prior, choose one from the white box of priors on the right to choose. To view the specification of a prior, or to add a new prior, click BETY-DB > Priors and enter the information on the variable, distribution name, distribution parameters, etc. N is the sample size underlying the prior specification (0 is ok for uninformative priors). You can also got to Data > Variables in order to use the search function to find an existing variable (or create a new one). Please try not to create new variables unnecessarily (e.g. changes of variable name or units to what your model uses is handled internally, so you want to find the trait with the correct MEANING). Additional information on adding PFTs, Species, and Priors can be found in Adding An [Ecosystem Model]. 6.3.3 Choosing initial vegetation On the Input Selection webpage, in addition to selecting PFTs, start & end dates, and meteorology, many models also require some way of specifying the initial conditions for the vegetation, which may range from setting the aboveground biomass and LAI up to detailed inventory-like data on species composition and stand structure. At the moment, PEcAn has three cases for initial conditions: If files already exist in the database, they can simply be selected from the menu. For ED2, there are 3 different veg files (site, pss, css) and it is important that you select a complete set, not mix and match. If files don’t exist they can be uploaded following the instructions in Create a database file record for the input data. Automated vegetation initial condition workflow As with meteorology, PEcAn is working to develop a model-agnostic workflow for converting various sources of vegetation data to common standards, developing common processing tools, and then writing out to model-specific formats. This process is in a much early stage than the meteorology workflow, as we are still researching what the options are for standard formats, but ultimately aims to be much more broad in scope, considering not just plot inventory data but also historical documentation, paleoecological proxies, satellite remote sensing (e.g. LANDSAT), airborne hyperspectral imagery, and active remote sensing (Lidar, Radar). At the moment, what is functional is a prototype workflow that works for inventory-based vegetation data. This data can come from either files that have been registered with the BETY Inputs and Formats tables or can be queried from the USFS Forest Inventory and Analysis (FIA). For more information visit Section 13.1.2.2 Vegetation Data 6.3.4 US FIA This tool works with an internal copy of the FIA that is uploaded to a postGRES database along side BETY, however for space reasons this database does not ship with the PEcAn VM. To turn this feature on: Download and Install the FIA database. Instructions in Installing data for PEcAn For web-base runs, specify the database settings in the config.php For R-based runs, specify the database settings in the THE PEcAn XML More detailed information on how PEcAn processes inputs can be found on our [Input Conversion]page. 6.3.5 Spin up A number of ecosystem models are typically initialized by spinning up to steady state. At the moment PEcAn doesn’t handle spin up automatically (e.g. looping met, checking for stability), but there are various ways to achieve a spin-up within the system. Option 1: If there are model-specific settings in a model’s settings/config file, then that file can be accessed by clicking on the Edit model config check box. If this box is selected then PEcAn will pause the site run workflow after it has generated your model config file, but before it runs the model, and give you an opportunity to edit the file by hand, allowing you to change any model-specific spin up settings (e.g met recycling, spin up length) Option 2: Set start_year very early and set the met drivers to be a long time series (e.g. PalEON, something custom uploaded to Inputs) Option 3: In the MODEL_TYPE table, add your model’s restart format as an optional input, modify the model specific write.config function to use that restart, and then load a previous spin-up to the Inputs table Beyond these options, we hope to eventually develop more general, model-agnostic tools for spin up. In particular, we have started to explore the accelerated spin-up and semi-analytical techniques being developed by Yiqi Luo’s lab 6.3.6 Selecting a soils product Many models have requirements for soils information, which may include: site-specific soil texture and depth information; soil biogeochemical initial conditions (e.g. soil carbon and nitrogen pools); soil moisture initial conditions; and soil thermal initial conditions. As with Choosing initial vegetation, we eventually hope to develop data standards, soils workflows, and spin-up tools, but at the moment this workflow is in the early stages of development. Model requirements need to be met byCreating a new Input record in BETY into the database or using files that have already been uploaded. Similar to met, we recommend that this file be in the PEcAn-standard netCDF described below, but model-specific files can also be registered. 6.3.7 Soil texture, depth, and physical parameters A PEcAn-standard netCDF file format exists for soil texture, depth, and physical parameters, using PEcAn standard names that are largely a direct extention of the CF standard. The easiest way to create this file is with the PEcAn R function soil2netcdf as described in the Soil Data section of the Advanced Users Guide. A table of standard names and units can be listed using PEcAn.data.land::soil.units() with no arguments. More detailed information on how PEcAn processes inputs can be found on our [Input Conversion] page. 6.3.8 Other model inputs Finally, any other model-specific inputs (e.g. N deposition, land use history, etc), should be met by Creating a new Input record in BETY or using files that have already been uploaded. "],["intermediate-user.html", "7 More on the PEcAn Web Interface", " 7 More on the PEcAn Web Interface This section will provide information to those wanting to take advantage of PEcAn’s customizations from the web interface. Additional web configuration - Advanced options available from the web interface Brown Dog Sensitivity and ensemble analyses[TODO: Under construction…] [Editing model configurations][TODO: Under construction…] Settings-configured analyses - Analyses only available by manually editing pecan.xml Parameter data assimilation (PDA) State data assimilation (SDA) Remote execution with PEcAn - Running analyses and generally working with external machines (HPC) in the context of PEcAn. "],["additional-web-configuration.html", "7.1 Additional web configuration", " 7.1 Additional web configuration Additional settings for web configuration: Web interface setup Brown Dog Advanced setup [Sensitivity analysis] (TODO) [Uncertainty analysis] (TODO) Editing model configuration files 7.1.1 Web interface setup There are few options which you can change via web interface. To visit the configuration page either you can just click on the setups link on the introduction page alternatively can type <host>/setups/. The list of configuration available Database configuration : BETYdb(Biofuel Ecophysiological Traits and Yields database) configuration details, can be edited according to need. Browndog configuration : Browndog configuration details, Used to connect browndog. Its included by default in VM. FIA Database : FIA(Forest Inventory and Analysis) Database configuration details, Can be used to add additional data to models. Google MapKey : Google Map key, used to access the google map by PEcAn. Change Password : A small infomation to change the VM user password. (if using Docker image it won’t work) Automatic Sync : If ON then it will sync the database between local machine and the remote servers. Still unders testing part might be buggy. Still work on the adding other editing feature going on, this page will be updated as new configuration will be available. 7.1.2 Brown Dog The Browndog service provides PEcAn with access to large and diverse sets of data at the click of a button in the format that PEcAn needs. By clicking the checkbox you will be using the Browndog Service to process data. For more information regarding meteorological data check out Available Meteorological Drivers. More information can be found at the Browndog website. 7.1.3 Advanced Setup (TODO: Under construction…) 7.1.4 Editing model configurations (TODO: Under construction…) "],["settings-configured-analyses.html", "7.2 Settings-configured analyses", " 7.2 Settings-configured analyses These analyses can be run through the web interface, but lack graphical interfaces and currently can only be configured throughthe XML settings. To run these analyses use the Edit pecan.xml checkbox on the Input configuration page. Eventually, these modules will be integrated into the web user interface. Parameter Data Assimilation (PDA) State Data Assimilation (SDA) MultiSettings Benchmarking (TODO: Add links) 7.2.1 Parameter data assimilation (PDA) All functions pertaining to Parameter Data Assimilation are housed within: pecan/modules/assim.batch. For a detailed usage of the module, please see the vignette under pecan/modules/assim.batch/vignettes. Hierarchical version of the PDA is also implemented, for more details, see the MultiSitePDAVignette package vignette and function-level documentation. 7.2.1.1 pda.mcmc.R This is the main PDA code. It performs Bayesian MCMC on model parameters by proposing parameter values, running the model, calculating a likelihood (between model output and supplied observations), and accepting or rejecting the proposed parameters (Metropolis algorithm). Additional notes: The first argument is settings, followed by others that all default to NULL.settings is a list used throughout Pecan, which contains all the user options for whatever analyses are being done. The easiest thing to do is just pass that whole object all around the Pecan code and let different functions access whichever settings they need. That’s what a lot of the rest of the Pecan code does. But the flexibility to override most of the relevant settings in settings is there by providing them directly as arguments to the function. The if(FALSE)… : If you’re trying to step through the function you probably will have the settings object around, but those other variables will be undefined. If you set them all to NULL then they’ll be ignored without causing errors. It is there for debugging purposes. The next step calls pda.settings(), which is in the file pda.utils.R (see below). It checks whether any settings are being overridden by arguments, and in most cases supplies default values if it can’t find either. In the MCMC setup section The code is set up to allow you to start a new MCMC chain, or to continue a previous chain as specified in settings. The code writes a simple text file of parameter samples at every iteration, which lets you get some results and even re-start an MCMC that fails for some reason. The code has adaptive jump distributions. So you can see some initialization of the jump distributions and associated variables here. Finally, note that after all this setup a new XML settings file is saved. The idea is that the original pecan.xml you create is preserved for provenance, and then periodically throughout the workflow the settings (likely containing new information) are re-saved with descriptive filenames. MCMC loop Periodically adjust jump distribution to make acceptance rate closer to target Propose new parameters one at a time. For each: First, note that Pecan may be handling many more parameters than are actually being targeted by PDA. Pecan puts priors on any variables it has information for (in the BETY database), and then these get passed around throughout the analysis and every step (meta-, sensitivity, ensemble analyses, etc.). But for PDA, you specify a separate list of probably far fewer parameters to constrain with data. These are the ones that get looped over and varied here. The distinction between all parameters and only those dealt with in PDA is dealt with in the setup code above. First a new value is proposed for the parameter of interest. Then, a new model run is set up, identical to the previous except with the new proposed value for the one parameter being updated on this run. The model run is started, and outputs collected after waiting for it to finish. A new likelihood is calculated based on the model outputs and the observed dataset provided. Standard Metropolis acceptance criteria is used to decide whether to keep the proposed parameter. Periodically (at interval specified in settings), a diagnostic figure is saved to disk so you can check on progress. This works only for NEE currently 7.2.1.2 pda.mcmc.bs.R This file is basically identical to pda.mcm.R, but rather than propose parameters one at a time, it proposes new values for all parameters at once (“bs” stands for “block sampling”). You choose which option to use by specifying settings\\(assim.batch\\)method: * “bruteforce” means sample parameters one at a time * “bruteforce.bs” means use this version, sampling all parameters at once * “emulator” means use the emulated-likelihood version 7.2.1.3 pda.emulator This version of the PDA code again looks quite similar to the basic “bruteforce” one, but its mechanics are very different. The basic idea is, rather than running thousands of model iterations to explore parameter space via MCMC, run a relatively smaller number of runs that have been carefully chosen to give good coverage of parameter space. Then, basically interpolate the likelihood calculated for each of those runs (actually, fit a Gaussian process to it), to get a surface that “emulates” the true likelihood. Now, perform regular MCMC (just like the “bruteforce” approach), except instead of actually running the model on every iteration to get a likelihood, just get an approximation from the likelihood emulator. Since the latter step takes virtually no time, you can run as long of an MCMC as you need at little computational cost, once you have done the initial model runs to create the likelihood emulator. 7.2.1.4 pda.mcmc.recover.R This function is for recovering a failed PDA MCMC run. 7.2.1.5 pda.utils.R This file contains most of the individual functions used by the main PDA functions (pda.mcmc.*.R). assim.batch is the main function Pecan calls to do PDA. It checks which method is requested (bruteforce, bruteforce.bs, or emulator) and call the appropriate function described above. pda.setting handles settings. If a setting isn’t found, the code can usually supply a reasonable default. pda.load.priors is fairly self explanatory, except that it handles a lot of cases and gives different options priority over others. Basically, the priors to use for PDA parameters can come from either a Pecan prior.distns or post.distns object (the latter would be, e.g., the posteriors of a meta-analysis or previous PDA), or specified either by file path or BETY ID. If not told otherwise, the code tries to just find the most recent posterior in BETY, and use that as prior for PDA. pda.create.ensemble gets an ensemble ID for the PDA. All model runs associated with an individual PDA (any of the three methods) are considered part of a single ensemble. This function does is register a new ensemble in BETY, and return the ID that BETY gives it. pda.define.prior.fn creates R functions for all of the priors the PDA will use. pda.init.params sets up the parameter matrix for the run, which has one row per iteration, and one column per parameter. Columns include all Pecan parameters, not just the (probably small) subset that are being updated by PDA. This is for compatibility with other Pecan components. If starting a fresh run, the returned matrix is just a big empty matrix to fill in as the PDA runs. If continuing an existing MCMC, then it will be the previous params matrix, with a bunch of blank rows added on for filling in during this round of PDA. pda.init.run This is basically a big wrapper for Pecan’s write.config function (actually functions [plural], since every model in Pecan has its own version). For the bruteforce and bruteforce.bs methods this will be run once per iteration, whereas the emulator method knows about all its runs ahead of time and this will be a big batch of all runs at once. pda.adjust.jumps tweaks the jump distributions for the standard MCMC method, and pda.adjust.jumps.bs does the same for the block-sampled version. pda.calc.llik calculates the log-likelihood of the model given all datasets provided to compare it to. pda.generate.knots is for the emulator version of PDA. It uses a Latin hypercube design to sample a specified number of locations in parameter space. These locations are where the model will actually be run, and then the GP interpolates the likelihood surface in between. pda.plot.params provides basic MCMC diagnostics (trace and density) for parameters being sampled. pda.postprocess prepares the posteriors of the PDA, stores them to files and the database, and performs some other cleanup functions. pda.load.data.r This is the function that loads in data that will be used to constrain the PDA. It’s supposed to be eventually more integrated with Pecan, which will know how to load all kinds of data from all kinds of sources. For now, it can do NEE from Ameriflux. pda.define.llik.r A simple helper function that defines likelihood functions for different datasets. Probably in the future this should be queried from the database or something. For now, it is extremely limited. The original test case of NEE assimilation uses a heteroskedastic Laplacian distribution. pda.get.model.output.R Another function that will eventually grow to handle many more cases, or perhaps be replaced by a better system altogether. For now though, it again just handles Ameriflux NEE. 7.2.1.6 get.da.data.*.R, plot.da.R Old codes written by Carl Davidson. Defunct now, but may contain good ideas so currently left in. 7.2.2 State data assimilation (SDA) sda.enkf.R is housed within: /pecan/modules/assim.sequential/R The tree ring tutorial is housed within: /pecan/documentation/tutorials/StateAssimilation More descriptive SDA methods can be found at: /pecan/book_source/adve_user_guide_web/SDA_Methods.Rmd 7.2.2.1 sda.enkf.R Description This is the main ensemble Kalman filter and generalized filter code. Originally, this was just ensemble Kalman filter code. Mike Dietze and Ann Raiho added a generalized ensemble filter to avoid filter divergence. The output of this function will be all the of run outputs, a PDF of diagnostics, and an Rdata object that includes three lists: FORECAST will be the ensemble forecasts for each year ANALYSIS will be the updated ensemble sample given the NPP observations enkf.params contains the prior and posterior mean vector and covariance matrix for each time step. 7.2.2.2 sda.enkf.R Arguments settings - (required) State Data Assimilation Tags Example settings object obs.mean - (required) a list of observation means named with dates in YYYY/MM/DD format obs.cov - (required) a list of observation covariances names with dates in YYYY/MM/DD format IC - (optional) initial condition matrix (dimensions: ensemble memeber # by state variables). Default is NULL. Q - (optional) process covariance matrix (dimensions: state variable by state variables). Default is NULL. 7.2.2.3 State Data Assimilation Workflow Before running sda.enkf, these tasks must be completed (in no particular order), Read in a State Data Assimilation Tags Example settings file with tags listed below. i.e. read.settings(‘pecan.SDA.xml’) Load data means (obs.mean) and covariances (obs.cov) as lists with PEcAn naming and unit conventions. Each observation must have a date in YYYY/MM/DD format (optional time) associated with it. If there are missing data, the date must still be represented in the list with an NA as the list object. Create initial conditions matrix (IC) that is state variables columns by ensemble members rows in dimension. sample.IC.MODEL can be used to create the IC matrix, but it is not required. This IC matrix is fed into write.configs for the initial model runs. The main parts of the SDA function are: Setting up for initial runs: Set parameters Load initial run inputs via split.inputs.MODEL Open database connection Get new workflow ids Create ensemble ids Performing the initial set of runs Set up for data assimilation Loop over time read.restart.MODEL - read model restart files corresponding to start.time and stop.time that you want to assimilate data into Analysis - There are four choices based on if process variance is TRUE or FALSE and if there is data or not. See explaination below. write.restart.MODEL - This function has two jobs. First, to insert adjusted state back into model restart file. Second, to update start.time, stop.time, and job.sh. run model Save outputs Create diagnostics 7.2.2.4 State Data Assimilation Tags Example <state.data.assimilation> <adjustment>TRUE</adjustment> <process.variance>FALSE</process.variance> <sample.parameters>FALSE</sample.parameters> <q.type>Single</q.type> <state.variables> <variable> <variable.name>AGB.pft</variable.name> <unit>MgC/ha/yr</unit> <min_value>0</min_value> <max_value>100000000</max_value> </variable> <variable> <variable.name>TotSoilCarb</variable.name> <unit>KgC/m^2</unit> <min_value>0</min_value> <max_value>100000000</max_value> </variable> </state.variables> <spin.up> <start.date>1950/01/01</start.date> <end.date>1960/12/31</end.date> </spin.up> <forecast.time.step>1</forecast.time.step> <start.date>1961/01/01</start.date> <end.date>2010/12/31</end.date> </state.data.assimilation> 7.2.2.5 State Data Assimilation Tags Descriptions adjustment : [optional] TRUE/FLASE flag for if ensembles needs to be adjusted based on weights estimated given their likelihood during analysis step. The Default is TRUE for this flag. process.variance : [optional] TRUE/FLASE flag for if process variance should be estimated (TRUE) or not (FALSE). If TRUE, a generalized ensemble filter will be used. If FALSE, an ensemble Kalman filter will be used. Default is FALSE. If you use the TRUE argument you can set three more optional tags to control the MCMCs built for the generalized esnsemble filter. nitrGEF : [optional] numeric defining the length of the MCMC chains. nthin : [optional] numeric defining thining length for the MCMC chains. nburnin : [optional] numeric defining the number of burnins during the MCMCs. q.type : [optional] If the process.variance is set to TRUE then this can take values of Single, Site or PFT. censored.data : [optional] logical set TRUE for censored state variables. sample.parameters : [optional] TRUE/FLASE flag for if parameters should be sampled for each ensemble member or not. This allows for more spread in the initial conditions of the forecast. state.variable : [required] State variable that is to be assimilated (in PEcAn standard format). spin.up : [required] start.date and end.date for initial model runs. NOTE: start.date and end.date are distinct from values set in the run tag because initial runs can be done over a subset of the full run. forecast.time.step : [optional] In the future, this will be used to allow the forecast time step to vary from the data time step. start.date : [optional] start date of the state data assimilation (in YYYY/MM/DD format) end.date : [optional] end date of the state data assimilation (in YYYY/MM/DD format) NOTE: start.date and end.date are distinct from values set in the run tag because this analysis can be done over a subset of the run. 7.2.2.6 Model Specific Functions for SDA Workflow 7.2.2.7 read.restart.MODEL.R The purpose of read.restart is to read model restart files and return a matrix that is site rows by state variable columns. The state variables must be in PEcAn names and units. The arguments are: outdir - output directory runid - ensemble member run ID stop.time - used to determine which restart file to read (in POSIX format) settings - pecan.SDA.xml settings object var.names - vector with state variable names with PEcAn standard naming. Example: c(‘AGB.pft’, ‘TotSoilCarb’) params - parameters used by ensemble member (same format as write.configs) 7.2.2.8 write.restart.MODEL.R This model specific function takes in new state and new parameter matrices from sda.enkf.R after the analysis step and translates new variables back to the model variables. Then, updates start.time, stop.time, and job.sh so that start_model_runs() does the correct runs with the new states. In write.restart.LINKAGES and write.restart.SIPNET, job.sh is updated by using write.configs.MODEL. outdir - output directory runid - run ID for ensemble member start.time - beginning of model run (in POSIX format) stop.time - end of model run (in POSIX format) settings - pecan.SDA.xml settings object new.state - matrix from analysis of updated state variables with PEcAn names (dimensions: site rows by state variables columns) new.params - In the future, this will allow us to update parameters based on states (same format as write.configs) inputs - model specific inputs from split.inputs.MODEL used to run the model from start.time to stop.time RENAME - [optional] Flag used in write.restart.LINKAGES.R for development. 7.2.2.9 split.inputs.MODEL.R This model specific function gives the correct met and/or other model inputs to settings\\(run\\)inputs. This function returns settings\\(run\\)inputs to an inputs argument in sda.enkf.R. But, the inputs will not need to change for all models and should return settings\\(run\\)inputs unchanged if that is the case. settings - pecan.SDA.xml settings object start.time - start time for model run (in POSIX format) stop.time - stop time for model run (in POSIX format) 7.2.2.10 sample.IC.MODEL.R This model specific function is optional. But, it can be used to create initial condition matrix (IC) with # state variables columns by # ensemble rows. This IC matrix is used for the initial runs in sda.enkf.R in the write.configs.MODEL function. ne - number of ensemble members state - matrix of state variables to get initial conditions from year - used to determine which year to sample initial conditions from 7.2.2.11 Analysis Options There are four options depending on whether process variance is TRUE/FALSE and whether or not there is data or not. If there is no data and process variance = FALSE, there is no analysis step. If there is no data and process variance = TRUE, process variance is added to the forecast. If there is data and process variance = TRUE, the generalized ensemble filter is implemented with MCMC. If there is data and process variance = FALSE, the Kalman filter is used and solved analytically. 7.2.2.12 The Generalized Ensemble Filter An ensemble filter is a sequential data assimilation algorithm with two procedures at every time step: a forecast followed by an analysis. The forecast ensembles arise from a model while the analysis makes an adjustment of the forecasts ensembles from the model towards the data. An ensemble Kalman filter is typically suggested for this type of analysis because of its computationally efficient analytical solution and its ability to update states based on an estimate of covariance structure. But, in some cases, the ensemble Kalman filter fails because of filter divergence. Filter divergence occurs when forecast variability is too small, which causes the analysis to favor the forecast and diverge from the data. Models often produce low forecast variability because there is little internal stochasticity. Our ensemble filter overcomes this problem in a Bayesian framework by including an estimation of model process variance. This methodology also maintains the benefits of the ensemble Kalman filter by updating the state vector based on the estimated covariance structure. This process begins after the model is spun up to equilibrium. The likelihood function uses the data vector \\(\\left(\\boldsymbol{y_{t}}\\right)\\) conditional on the estimated state vector \\(\\left(\\boldsymbol{x_{t}}\\right)\\) such that \\(\\boldsymbol{y}_{t}\\sim\\mathrm{multivariate\\:normal}(\\boldsymbol{x}_{t},\\boldsymbol{R}_{t})\\) where \\(\\boldsymbol{R}_{t}=\\boldsymbol{\\sigma}_{t}^{2}\\boldsymbol{I}\\) and \\(\\boldsymbol{\\sigma}_{t}^{2}\\) is a vector of data variances. To obtain an estimate of the state vector \\(\\left(\\boldsymbol{x}_{t}\\right)\\), we use a process model that incorporates a process covariance matrix \\(\\left(\\boldsymbol{Q}_{t}\\right)\\). This process covariance matrix differentiates our methods from past ensemble filters. Our process model contains the following equations \\(\\boldsymbol{x}_{t} \\sim \\mathrm{multivariate\\: normal}(\\boldsymbol{x}_{model_{t}},\\boldsymbol{Q}_{t})\\) \\(\\boldsymbol{x}_{model_{t}} \\sim \\mathrm{multivariate\\: normal}(\\boldsymbol{\\mu}_{forecast_{t}},\\boldsymbol{P}_{forecast_{t}})\\) where \\(\\boldsymbol{\\mu}_{forecast_{t}}\\) is a vector of means from the ensemble forecasts and \\(\\boldsymbol{P}_{forecast_{t}}\\) is a covariance matrix calculated from the ensemble forecasts. The prior for our process covariance matrix is \\(\\boldsymbol{Q}_{t}\\sim\\mathrm{Wishart}(\\boldsymbol{V}_{t},n_{t})\\) where \\(\\boldsymbol{V}_{t}\\) is a scale matrix and \\(n_{t}\\) is the degrees of freedom. The prior shape parameters are updated at each time step through moment matching such that \\(\\boldsymbol{V}_{t+1} = n_{t}\\bar{\\boldsymbol{Q}}_{t}\\) \\(n_{t+1} = \\frac{\\sum_{i=1}^{I}\\sum_{j=1}^{J}\\frac{v_{ijt}^{2}+v_{iit}v_{jjt}}{Var(\\boldsymbol{\\bar{Q}}_{t})}}{I\\times J}\\) where we calculate the mean of the process covariance matrix \\(\\left(\\bar{\\boldsymbol{Q}_{t}}\\right)\\) from the posterior samples at time t. Degrees of freedom for the Wishart are typically calculated element by element where \\(v_{ij}\\) are the elements of \\(\\boldsymbol{V}_{t}\\). \\(I\\) and \\(J\\) index rows and columns of \\(\\boldsymbol{V}\\). Here, we calculate a mean number of degrees of freedom for \\(t+1\\) by summing over all the elements of the scale matrix \\(\\left(\\boldsymbol{V}\\right)\\) and dividing by the count of those elements \\(\\left(I\\times J\\right)\\). We fit this model sequentially through time in the R computing environment using R package ‘rjags.’ Users have control over how they think is the best way to estimate \\(Q\\). Our code will look for the tag q.type in the XML settings under state.data.assimilation which can take 4 values of Single, Site, Vector, or Wishart. If q.type is set to single then one value of process variance will be estimated across all different sites and variables. When q.type is set to Site then a process variance will be estimated for each siteat a cost of more time and computation power. When the q.type is set to Vector or Wishart then process errors for each variable of each site will be estimated and propagated through time, while the Wishart Q support the estimation of covariance between sites and variables through the MCMC sampling of wishart distributions, which further support the propagation of process error through not just time but space and variables. 7.2.2.13 Multi-site State data assimilation. sda.enkf.multisite is housed within: /pecan/modules/assim.sequential/R The 4-site tutorial is housed within: ~/pecan/modules/assim.sequential/inst/MultiSite-Exs 7.2.2.14 sda.enkf.multisite.R Description sda.enkf.multisite function allows for assimilation of observed data at multiple sites at the same time. In order to run a multi-site SDA, one needs to send a multisettings pecan xml file to this function. This multisettings xml file needs to contain information required for running at least two sites under run tag. The code will automatically run the ensembles for all the sites and reformats the outputs matching the required formats for analysis step. #### sda.enkf.multisite.R Arguments * settings - (required) State Data Assimilation Tags Example settings object obs.mean - (required) Lists of date times named by time points, which contains lists of sites named by site ids, which contains observation means for each state variables of each site for each time point. obs.cov - (required) Lists of date times named by time points, which contains lists of sites named by site ids, which contains observation covariances for all state variables of each site for each time point. Q - (optional) Process covariance matrix given if there is no data to estimate it. Default is NULL. restart - (optional) Used for iterative updating previous forecasts. Default NULL. List object includes file path to previous runs and start date for SDA. Default is NULL. pre_enkf_params - (optional) Used for carrying out SDA with pre-existed enkf.params, in which the Pf, aqq, and bqq can be used for the analysis step. Default is NULL. ensemble.samples - (optional) Pass ensemble.samples from outside, which are lists of calibrated parameters for different plant functional types. This also helps to avoid GitHub check issues. Default is NULL. control - (optional) List of flags controlling the behavior of the SDA. Here is an example of the control list. The detailed explanation of them are shown inside the sda.enkf.multisite function in the assim.sequential package. control=list(trace = TRUE, TimeseriesPlot = FALSE, debug = FALSE, pause = FALSE, Profiling = FALSE, OutlierDetection=FALSE, parallel_qsub = TRUE, send_email = NULL, keepNC = TRUE, run_parallel = TRUE, MCMC.args = NULL) 7.2.2.15 obs.mean and obs.cov Description The observations are required for passing the time points into the SDA workflow, which should match the start and end date in the settings object. For the generations of obs.mean and obs.cov, please use the function SDA_OBS_Assembler inside the assim.sequential package. For the unconstrained runs, please specify the free.run flag as TRUE inside the settings$state.data.assimilation$Obs_Prep section. Otherwise, please specify the arguments that are needed for preparations of different observations (note that, the observation preparation functions currently only support MODIS LAI, Landtrendr AGB, SMAP soil moisture, and SoilGrid soil organic carbon). For the details about how to setup those arguments, please reference the Create_Multi_settings.R script inside ~/pecan/modules/assim.sequential/inst/MultiSite-Exs/SDA directory. The observed mean and covariance need to be formatted as list of different dates with observations. For each element of this list also there needs to be a list with mean and cov matrices of different sites named by their siteid. In case that zero variance was estimated for a variable inside the obs.cov, the SDA code will automatically replace that with half of the minimum variance from other non-zero variables in that time step. Here are examples of the obs.mean and obs.cov for single time point, two sites, and two observations. > obs.mean $`2010/12/31` $`2010/12/31`$`1000000650` AbvGrndWood GWBI 111.502 1.0746 $`2010/12/31`$`1000000651` AbvGrndWood GWBI 114.302 1.574695 > obs.cov $`2010/12/31` $`2010/12/31`$`1000000650` [,1] [,2] [1,] 19.7821691 0.213584319 [2,] 0.5135843 0.005162113 $`2010/12/31`$`1000000651` [,1] [,2] [1,] 15.2821691 0.513584319 [2,] 0.1213583 0.001162113 7.2.2.16 Anlysis SDA workflow Before running the SDA analysis functions, the ensemble forecast results have to be generated, and arguments such as H matrix, MCMC arguments, and multi-site Y and R (by Construct.R function) have to be generated as well. Here are the workflows for three types of SDA analysis functions that we are currently used. Decide which analysis function to be used. Here we have three options: 1) traditional ensemble Kalman Filter (EnKF.MultiSite) with analytical solution, within which the process error needs to be prescribed from outside (see Q arguments in the sda.enkf.multisite function); 2) generalized ensemble Kalman Filter (GEF.MultiSite); and 3) block-based generalized ensemble Kalman Filter (analysis_sda_block). The latter two methods support the feature of propagating process variance across space and time. To choose the analysis method 1, we need to set the process.variance as FALSE. Otherwise, if we set the process.variance as TRUE and provide the q.type as either SINGLE or SITE the method GEF.MultiSite will be used, and if we provide the q.type as either vector or wishart the method analysis_sda_block will be used. The explanations for different Q types can be found in the The Generalized Ensemble Filter section in this documentation. For the analysis_sda_block method, there is also a special case for complete or partial missing of observations. If we decide to use EnKF.MultiSite, then the analysis results will be calculated based on equations. If we decide to use GEF.MultiSite, then it will first do censoring process based on how you setup the censored.data flag within settings xml file. Then, if t equals to 1, we will first initialize the aqq, bqq, aq, and bq based on how you setup the q.type argument within settings xml file. After preparing the initial conditions (ICs), data, and constants for the GEF.MultiSite.Nimble nimble model, the MCMC sampling will happen afterwards. Finally, the process variance and the analysis results will be calculated and updated and returned to the sda.enkf.multisite function. If we decide to use analysis_sda_block method, then if t equals 1, the workflow will first build blocks using the matrix_network function for the calculations for indexes of variables by networks of site groups based on the spatial scale (see scalef argument in the Example of multi-settings pecan xml file section) that we specify inside the state.data.assimilation section. Then, the workflow will execute the build.block.xy to automatically initialize the overall block-based MCMC lists (block.list.all) and fill in datasets (mu.f, y.censored, or Pf) for each block and for all time points to facilitate the process of passing arguments between scripts/functions. After that, the MCMC_args (see the explanations inside the roxygen structure of the sda.enkf.multisite function) will be specified either from the control list or by default (see below). Then, the workflow will update the process error using the update_q function. If t equals 1, it will initialize the arguments for the process error. Otherwise, it will grab the previously updated process error. After that, in the MCMC_Init function, it will create the initial conditions (ICs) for the MCMC sampling based on randomly sampled data. Then, the completed block-based arguments (block.list.all) will be used by the MCMC_block_function function under the parallel mode. Finally, the block-based results will be converted into long vectors and large matrices by the block.2.vector function and values such as block.list.all, mu.f, mu.a, Pf, Pa will be returned to the sda.enkf.multisite function. MCMC.args <- list(niter = 1e5, nthin = 10, nchain = 3, nburnin = 1e4) There is a special case for the analysis_sda_block method where NA values appear in the observations, which provides the opportunity for the estimations of process error without any observations. This special case currently is only working under restricted conditions when we set the scalef as 0 (that feature currently only works for isolated-site SDA runs) and turn on the free.run flag in the settings, which will then automatically fill in NA values to the observations by each site (see bellow). <state.data.assimilation> <scalef>0</scalef> <free.run>TRUE</free.run> </state.data.assimilation> 7.2.2.17 Example of multi-settings pecan xml file Here is an example of what does a multi-settings pecan xml file look like. The detailed explanation for the xml file can be found under the Multi-Settings section in the 03_pecan_xml.Rmd documentation. <?xml version="1.0"?> <pecan.multi> <state.data.assimilation> <process.variance>TRUE</process.variance> <aqq.Init>1</aqq.Init> <bqq.Init>1</bqq.Init> <sample.parameters>FALSE</sample.parameters> <adjustment>TRUE</adjustment> <censored.data>FALSE</censored.data> <FullYearNC>TRUE</FullYearNC> <NC.Overwrite>FALSE</NC.Overwrite> <NC.Prefix>sipnet.out</NC.Prefix> <q.type>SINGLE</q.type> <free.run>FALSE</free.run> <Localization.FUN>Local.support</Localization.FUN> <scalef>1</scalef> <chains>5</chains> <state.variables> <variable> <variable.name>AbvGrndWood</variable.name> <unit>MgC/ha</unit> <min_value>0</min_value> <max_value>9999</max_value> </variable> <variable> <variable.name>LAI</variable.name> <unit></unit> <min_value>0</min_value> <max_value>9999</max_value> </variable> <variable> <variable.name>SoilMoistFrac</variable.name> <unit></unit> <min_value>0</min_value> <max_value>100</max_value> </variable> <variable> <variable.name>TotSoilCarb</variable.name> <unit>kg/m^2</unit> <min_value>0</min_value> <max_value>9999</max_value> </variable> </state.variables> <Obs_Prep> <Landtrendr_AGB> <AGB_input_dir>/projectnb/dietzelab/dongchen/Multi-site/download_500_sites/AGB</AGB_input_dir> <allow_download>TRUE</allow_download> <export_csv>TRUE</export_csv> <timestep> <unit>year</unit> <num>1</num> </timestep> </Landtrendr_AGB> <MODIS_LAI> <search_window>30</search_window> <export_csv>TRUE</export_csv> <run_parallel>TRUE</run_parallel> <timestep> <unit>year</unit> <num>1</num> </timestep> </MODIS_LAI> <SMAP_SMP> <search_window>30</search_window> <export_csv>TRUE</export_csv> <update_csv>FALSE</update_csv> <timestep> <unit>year</unit> <num>1</num> </timestep> </SMAP_SMP> <Soilgrids_SoilC> <timestep> <unit>year</unit> <num>1</num> </timestep> </Soilgrids_SoilC> <outdir>/projectnb/dietzelab/dongchen/All_NEON_SDA/test_OBS</outdir> <start.date>2012-07-15</start.date> <end.date>2021-07-15</end.date> </Obs_Prep> <spin.up> <start.date>2004/01/01</start.date> <end.date>2006/12/31</end.date> </spin.up> <forecast.time.step>1</forecast.time.step> <start.date>2004/01/01</start.date> <end.date>2006/12/31</end.date> </state.data.assimilation> <info> <notes></notes> <userid>-1</userid> <username></username> <date>2017/12/06 21:19:33 +0000</date> </info> <outdir>/fs/data3/hamzed/output/MultiSite_Sandbox/paleon_sda_SIPNET-8768</outdir> <database> <bety> <user>bety</user> <password>bety</password> <host>128.197.168.114</host> <dbname>bety</dbname> <driver>PostgreSQL</driver> <write>false</write> </bety> <dbfiles>/fs/data1/pecan.data/dbfiles/</dbfiles> </database> <pfts> <pft> <name>temperate.deciduous_SDA</name> <constants> <num>2</num> </constants> <outdir>/fs/data2/output//PEcAn_1000008768/pft/temperate.deciduous_SDA</outdir> <posteriorid>1000008552</posteriorid> </pft> </pfts> <meta.analysis> <iter>3000</iter> <random.effects> <on>FALSE</on> <use_ghs>TRUE</use_ghs> </random.effects> </meta.analysis> <ensemble> <size>20</size> <ensemble.id>1000016146</ensemble.id> <start.year>1995</start.year> <end.year>1999</end.year> <samplingspace> <parameters> <method>uniform</method> </parameters> <met> <method>sampling</method> </met> <soil> <parent>parameters</parent> </soil> <vegetation> <parent>soil</parent> </vegetation> </samplingspace> </ensemble> <model> <id>1000000022</id> <default.param>/fs/data3/hamzed/output/paleon_sda_SIPNET-8768/Bartlett.param</default.param> <type>SIPNET</type> <revision>r136</revision> <delete.raw>FALSE</delete.raw> <binary>/fs/data5/pecan.models/SIPNET/trunk/sipnet_ssr</binary> </model> <workflow> <id>1000008768</id> </workflow> <run> <settings.1000000650> <site> <id>1000000650</id> <met.start>1960/01/01</met.start> <met.end>1965/12/31</met.end> <name>Harvard Forest - Lyford Plots (PalEON PHA)</name> <lat>42.53</lat> <lon>-72.18</lon> </site> <inputs> <met> <source>CRUNCEP</source> <output>SIPNET</output> <path> <path1>/fs/data1/pecan.data/dbfiles/CRUNCEP_SIPNET_site_0-758/CRUNCEP.1960-01-01.2010-12-31.clim</path1> </path> </met> </inputs> <start.date>1960/01/01</start.date> <end.date>1980/12/31</end.date> </settings.1000000650> <settings.1000000651> <site> <id>1000000651</id> <met.start>1960/01/01</met.start> <met.end>1965/12/31</met.end> <name>Harvard Forest - Lyford Plots (PalEON PHA)</name> <lat>42.53</lat> <lon>-72.18</lon> </site> <inputs> <met> <source>CRUNCEP</source> <output>SIPNET</output> <path> <path1>/fs/data1/pecan.data/dbfiles/CRUNCEP_SIPNET_site_0-758/CRUNCEP.1960-01-01.2010-12-31.clim</path1> </path> </met> </inputs> <start.date>1960/01/01</start.date> <end.date>1980/12/31</end.date> </settings.1000000651> </run> <host> <name>localhost</name> <rundir>/fs/data3/hamzed/output/MultiSite_Sandbox/paleon_sda_SIPNET-8768/run</rundir> <outdir>/fs/data3/hamzed/output/MultiSite_Sandbox/paleon_sda_SIPNET-8768/out</outdir> </host> <settings.info> <deprecated.settings.fixed>TRUE</deprecated.settings.fixed> <settings.updated>TRUE</settings.updated> <checked>TRUE</checked> </settings.info> <rundir>/fs/data3/hamzed/output/MultiSite_Sandbox/paleon_sda_SIPNET-8768/run</rundir> <modeloutdir>/fs/data3/hamzed/output/MultiSite_Sandbox/paleon_sda_SIPNET-8768/out</modeloutdir> <multisettings>run</multisettings> </pecan.multi> 7.2.3 Running SDA on remote In general, the idea is that sending, running and monitoring an SDA job should all be done using two functions (SDA_remote_launcher and Remote_Sync_launcher). SDA_remote_launcher checks the XML settings defining the run, sets up the SDA on the remote machine, and then sends a qusb command for running the job. Remote_Sync_launcher, on the other hand, sits on the local machine and monitors the progress of the job(s) and brings back the outputs as long as the job is running. SDA_remote_launcher sets up the job by copying a template SDA workflow R script and a bash file template that are ready for submission to the remote machine. This function checks the paths to all inputs including met, soil, site_pft and etc., testing whether they exists on the remote machine or not. If they do not exist, the function copies the missing paths over and replaces the settings accordingly. After submitting the bash script, the function returns the PID of the job writing the log file, allowing the Remote_Sync_launcher to monitor the progress of the run, checks to see if the job is still alive, and determines if sda.output.rdata has been updated since the last check or not. Additionally, the Remote_Sync_launcher function follows the progress of the remote job by executing a nohup command on a template R script and keeps the R console open for further use. This R script, as mentioned above, constantly pings the given PID every 5 minutes and copies over the SDA output. Several points on how to prepare your xml settings for the remote SDA run: 1 - In the main pecan workflow.R, if you were able to generate pecan.TRAIT.xml, your settings are ready to be used for an SDA run. All you need to add is your state data assimilation tags. 2 - Inside the xml setting an <outdir> flag needs to be included and point to a local directory where SDA_remote_launcher will look for either a sample.Rdata file or a pft folder. 3 - You need to set your <host> tag according to the desired remote machine. You can learn more about this on the Remote execution with PEcAn section of the documentation. Please make sure that the <folder> tag inside <host> is pointing to a directory where you would like to store and run your SDA job(s). 4 - Finally, make sure the tag inside the tag is set to the correct path on the remote machine. 7.2.4 Restart functionality in SDA If you prefer to run your SDA analysis in multiple stages, where each phase picks up where the previous one left off, you can use the restart argument in the sda.enkf.multisite function. You need to make sure that the output from previous step exists in the SDA folder (in the outfolder), and the <start.date> is the same as <end.date> from the previous step. When you run the SDA with the restart parameter, it will load the output from the previous step and use configs already written in the run folder to set itself up for the next step. Using the restart argument could be as easy as : sda.enkf.multisite(settings, obs.mean =obs.mean , obs.cov = obs.cov, control = SDA.arguments(debug = FALSE, TimeseriesPlot = FALSE), restart = FALSE ) Where the new settings, obs.mean and obs.cov contain the relevant information for the next phase. 7.2.5 State Data Assimilation Methods By Ann Raiho Our goal is build a fully generalizable state data assimilation (SDA) workflow that will assimilate multiple types of data and data products into ecosystem models within PEcAn temporally and spatially. But, during development, specifically with PalEON goals in mind, we have been focusing on assimilating tree ring estimated NPP and AGB and pollen derived fractional composition into two ecosystem models, SIPNET and LINKAGES, at Harvard Forest. This methodology will soon be expanded to include the PalEON sites listed on the state data assimilation wiki page. 7.2.5.1 Data Products During workflow development, we have been working with tree ring estimated NPP and AGB and pollen derived fractional composition data products. Both of these data products have been estimated with a full accounting of uncertainty, which provides us with state variable observation mean vector and covariance matrix at each time step. These data products are discussed in more detail below. Even though we have been working with specific data products during development, our workflow is generalizable to alternative data products as long as we can calculate a state variable observation mean vector and covariance for a time point. 7.2.5.2 Tree Rings We have been primarily been working with the tree ring data product created by Andria Dawson and Chris Paciorek and the PEcAn tree ring allometry module. They have developed a Bayesian model that estimates annual aboveground biomass increment (Mg/ha/yr) and aboveground biomass (Mg/ha) for each tree in a dataset. We obtain this data and aggregate to the level appropriate for the ecosystem model. In SIPNET, we are assimilating annual gross woody increment (Mg/ha/yr) and above ground woody biomass (Mg/ha). In LINKAGES, we are assimilating annual species biomass. More information on deriving these tree ring data products can be found in Dawson et al 201?. We have been working mostly with tree data collected at Harvard Forest. Tree rings and census data were collected at Lyford Plot between 1960 and 2010 in three separate plots. Other tree ring data will be added to this analysis in the future from past PEON courses (UNDERC), Kelly Heilman (Billy’s Lake and Bigwoods), and Alex Dye (Huron Mt. Club). 7.2.5.3 Pollen STEPPS is a Bayesian model developed by Paciorek and McLachlan 2009 and Dawson et al 2016 to estimate spatially gridded fractional composition from fossil pollen. We have been working with STEPPS1 output, specifically with the grid cell that contains Harvard Forest. The temporal resolution of this data product is centennial. Our workflow currently operates at annual time steps, but does not require data at every time step. So, it is possible to assimilate fractional composition every one hundred years or to assimilate fractional composition data every year by accounting for variance inflation. In the future, pollen derived biomass (ReFAB) will also be available for data assimilation. Although, we have not discussed how STEPPS and ReFAB data assimilation will work. 7.2.5.4 Variance Inflation *Side Note: Probably want to call this something else now. Since the fractional composition data product has a centennial resolution, in order to use fractional composition information every year we need to change the weight the data has on the analysis. The basic idea is to downweight the likelihood relative to the prior to account for (a) the fact that we assimilate an observation multiple times and (b) the fact that the number of STEPPS observations is ‘inflated’ because of the autocorrelation. To do this, we take the likelihood and raise it to the power of (1/w) where ‘w’ is an inflation factor. w = D * (N / ESS) where D is the length of the time step. In our case D = 100. N is the number of time steps. In our case N = 11. and ESS is the effective sample size. The ESS is calculated with the following function where ntimes is the same as N above and sims is a matrix with the dimensions number of MCMC samples by number of state variables. ESS_calc <- function(ntimes, sims){ # center based on mean at each time to remove baseline temporal correlation # (we want to estimate effective sample size effect from correlation of the errors) row.means.sims <- sims - rowMeans(sims) # compute all pairwise covariances at different times covars <- NULL for(lag in 1:(ntimes-1)){ covars <- c(covars, rowMeans(row.means.sims[(lag+1):ntimes, , drop = FALSE] * row.means.sims[1:(ntimes-lag), , drop = FALSE])) } vars <- apply(row.means.sims, 1, var) # pointwise post variances at each time, might not be homoscedastic # nominal sample size scaled by ratio of variance of an average # under independence to variance of average of correlated values neff <- ntimes * sum(vars) / (sum(vars) + 2 * sum(covars)) return(neff) } The ESS for the STEPPS1 data product is 3.6, so w in our assimilation of fractional composition at Harvard Forest will be w = 305.6. 7.2.5.5 Current Models SIPNET and LINKAGES are the two ecosystem models that have been used during state data assimilation development within PEcAn. SIPNET is a simple ecosystem model that was built for… LINKAGES is a forest gap model created to simulate the process of succession that occurs when a gap is opened in the forest canopy. LINKAGES has 72 species level plant functional types and the ability to simulate some below ground processes (C and N cycles). 7.2.5.6 Model Calibration Without model calibration both SIPNET and LINKAGES make incorrect predictions about Harvard Forest. To confront this problem, SIPNET and LINKAGES will both be calibrated using data collected at the Harvard Forest flux tower. Istem has completed calibration for SIPNET using a parameter data assimilation emulator contained within the PEcAn workflow. LINKAGES will also be calibrated using this method. This method is also generalizable to other sites assuming there is data independent of data assimilation data available to calibrate against. 7.2.5.7 Initial Conditions The initial conditions for SIPNET are sampled across state space based on data distributions at the time when the data assimilation will begin. We do not sample LINAKGES for initial conditions and instead perform model spin up for 100 years prior to beginning data assimilation. In the future, we would like to estimate initial conditions based on data. We achieve adequate spread in the initial conditions by allowing the parameters to vary across ensemble members. 7.2.5.8 Drivers We are currently using Global Climate Model (GCM) drivers from the PaLEON model intercomparison. Christy Rollinson and John Tipton are creating MET downscaled GCM drivers for the Paleon data assimilation sites. We will use these drivers when they are available because they are a closer representation of reality. 7.2.5.9 Sequential State Data Assimilation We are using sequential state data assimilation methods to assimilate paleon data products into ecosystem models because less computation power is required for sequential state data assimilation than for particle filter methods. 7.2.5.10 General Description The general sequential data assimilation framework consists of three steps at each time step: 1. Read the state variable output for time t from the model forecast ensembles and save the forecast mean (muf) and covariance (Pf). 2. If there are data mean (y) and covariance (R) at this time step, perform data assimilation analysis (either EnKF or generalized ensemble filter) to calculate the new mean (mua) and covariance (Pa) of the state variables. 3. Use mua and Pa to restart and run the ecosystem model ensembles with new state variables for time t+1. 7.2.5.11 EnKF There are two ways to implement sequential state data assimilation at this time. The first is the Ensemble Kalman Filter (EnKF). EnKF has an analytical solution, so the kalman gain, analysis mean vector, and analysis covariance matrix can be calculated directly: K <- Pf %*% t(H) %*% solve((R + H %*% Pf %*% t(H))) ## Kalman Gain mu.a <- mu.f + K %*% (Y - H %*% mu.f) # Analysis mean vector Pa <- (diag(ncol(X)) - K %*% H) %*% Pf # Analysis covariance matrix The EnKF is typically used for sequential state data assimilation, but we found that EnKF lead to filter divergence when combined with our uncertain data products. Filter divergence led us to create a generalized ensemble filter that estimates process variance. 7.2.5.12 Generalized Ensemble Filter The generalized ensemble filter follows generally the three steps of sequential state data assimilation. But, in the generalized ensemble filter we add a latent state vector that accounts for added process variance. Furthermore, instead of solving the analysis analytically like the EnKF, we have to estimate the mean analysis vector and covariance matrix with MCMC. 7.2.5.13 Mapping Ensemble Output to Tobit Space There are some instances when we have right or left censored variables from the model forecast. For example, a model estimating species level biomass may have several ensemble members that produce zero biomass for a given species. We are considering this case a left censored state variable that needs to be mapped to normal space using a tobit model. We do this by creating two matrices with dimensions number of ensembles by state variable. The first matrix is a matrix of indicator variables (y.ind), and the second is a matrix of censored variables (y.censored). When the indicator variable is 0 the state variable (j) for ensemble member (i) is sampled. This allows us to impute a normal distribution for each state variable that contains ‘missing’ forecasts or forecasts of zero. tobit2space.model <- nimbleCode({ for(i in 1:N){ y.censored[i,1:J] ~ dmnorm(muf[1:J], cov = pf[1:J,1:J]) for(j in 1:J){ y.ind[i,j] ~ dconstraint(y.censored[i,j] > 0) } } muf[1:J] ~ dmnorm(mean = mu_0[1:J], cov = pf[1:J,1:J]) Sigma[1:J,1:J] <- lambda_0[1:J,1:J]/nu_0 pf[1:J,1:J] ~ dinvwish(S = Sigma[1:J,1:J], df = J) }) 7.2.5.14 Generalized Ensemble Filter Model Description Below is the BUGS code for the full analysis model. The forecast mean an covariance are calculated from the tobit2space model above. We use a tobit likelihood in this model because there are instances when the data may be left or right censored. Process variance is included by adding a latent model state (X) with a process precision matrix (q). We update our prior on q at each time step using our estimate of q from the previous time step. tobit.model <- nimbleCode({ q[1:N,1:N] ~ dwish(R = aq[1:N,1:N], df = bq) ## aq and bq are estimated over time Q[1:N,1:N] <- inverse(q[1:N,1:N]) X.mod[1:N] ~ dmnorm(muf[1:N], prec = pf[1:N,1:N]) ## Model Forecast ##muf and pf are assigned from ensembles ## add process error X[1:N] ~ dmnorm(X.mod[1:N], prec = q[1:N,1:N]) #agb linear #y_star[1:YN,1:YN] <- X[1:YN,1:YN] #[choose] #f.comp non linear #y_star[1:YN] <- X[1:YN] / sum(X[1:YN]) ## Analysis y.censored[1:YN] ~ dmnorm(X[1:YN], prec = r[1:YN,1:YN]) #is it an okay assumpution to just have X and Y in the same order? #don't flag y.censored as data, y.censored in inits #remove y.censored samplers and only assign univariate samplers on NAs for(i in 1:YN){ y.ind[i] ~ dconstraint(y.censored[i] > 0) } }) 7.2.5.15 Ensemble Adjustment Each ensemble member has a different set of species parameters. We adjust the updated state variables by using an ensemble adjustment. The ensemble adjustment weights the ensemble members based on their likelihood during the analysis step. S_f <- svd(Pf) L_f <- S_f$d V_f <- S_f$v ## normalize Z <- X*0 for(i in seq_len(nrow(X))){ Z[i,] <- 1/sqrt(L_f) * t(V_f)%*%(X[i,]-mu.f) } Z[is.na(Z)]<-0 ## analysis S_a <- svd(Pa) L_a <- S_a$d V_a <- S_a$v ## analysis ensemble X_a <- X*0 for(i in seq_len(nrow(X))){ X_a[i,] <- V_a %*%diag(sqrt(L_a))%*%Z[i,] + mu.a } 7.2.5.16 Diagnostics There are three diagnostics we have currently implemented: time series, bias time series, and process variance. The time series diagnostics show the data, forecast, and analysis time series for each state variable. These are useful for visually assessing variance and magnitude of change of state variables through time. These time series are also updated throughout the analysis and are also created as a pdf at the end of the SDA workflow. There are two types of bias time series the first assess the bias in the update (the forecast minus the analysis) and the second assess the bias in the error (the forecast minus the data). These bias time series are useful for identifying which state variables have intrinsic bias within the model. For example, if red oak biomass in LINKAGES increases at every time step (the update and the error are always positive), this would suggest that LINKAGES has a positive growth or recruitment bias for red oak. Finally, when using the generalized ensemble filter to estimate process variance, there are two additional plots to assess estimation of process variance. The first is a correlation plot of the process covariance matrix. This tells us what correlations are incorrectly represented by the model. For example, if red oak biomass and white pine biomass are highly negatively correlated in the process covariance matrix, this means that the model either 1) has no relationship between red oak and white pine and they should affect each other negatively or 2) there is a positive relationship between red oak and white pine and there shouldn’t be any relationship. We can determine which of these is true by comparing the process covariance matrix to the model covariance matrix. The second process variance diagnostic plot shows how the degrees of freedom associated with estimating the process covariance matrix have changed through time. This plot should show increasing degrees of freedom through time. 7.2.6 MultiSettings (TODO: Under construction…) 7.2.7 Benchmarking Benchmarking is the process of comparing model outputs against either experimental data or against other model outputs as a way to validate model performance. We have a suit of statistical comparisons that provide benchmarking scores as well as visual comparisons that help in diagnosing data-model and/or model-model differences. 7.2.7.1 Data Preparation All data that you want to compare with model runs must be registered in the database. This is currently a step that must be done by hand either from the command line or through the online BETY interface. The data must have three records: An input record (Instructions here) A database file record (Instructions here) A format record (Instructions here) 7.2.7.2 Model Runs Model runs can be setup and executed - Using the PEcAn web interface online or with a VM (see setup) - By hand using the pecan.xml 7.2.7.3 The Benchmarking Shiny App The entire benchmarking process can be done through the Benchmarking R Shiny app. When the model run has completed, navigate to the workflow visualization Shiny app. Load model data Select the workflow and run id Make sure that your model output is loading properly (i.e. you can see plots of your data) Load benchmarking data Again make sure that you can see the uploaded data plotted alongside the model output. In the future there will be more tools for double checking that your uploaded data is appropriate for benchmarking, but for now you may need to do the sanity checks by hand. 7.2.7.4 Create a reference run record Navigate to the Benchmarking tab The first step is to register the new model run as a reference run in the database. Benchmarking cannot be done before this step is completed. When the reference run record has been created, additional menus for benchmarking will appear. 7.2.7.5 Setup Benchmarks and metrics From the menus select The variables in the uploaded data that you wish to compare with model output. The numerical metrics you would like to use in your comparison. Additional comparison plots that you would like to see. Note: All these selections populate the benchmarking section of the pecan.BENCH.xml which is then saved in the same location as the original run output. This xml is purely for reference. 7.2.7.5.1 Benchmarking Output All benchmarking results are stored in the benchmarking directory which is created in the same folder as the original model run. The benchmaking directory contains subdirectories for each of the datasets compared with the model output. The names of these directories are the same as the corresponding data set’s input id in BETY. Each input directory contains benchmarking.output.Rdata, an Rdata file contianing all the results of the benchmarking workflow. load(benchmarking.output.Rdata) loads a list called result.out which contains the following: bench.results: a data frame of all numeric benchmarking scores format: a data frame that can be used to see how the input data was transformed to make it comparable to the model output. This involves converting from the original variable names and units to the internal pecan standard. aligned.dat: a data frame of the final aligned model and input values. All plots are saved as pdf files with names with “benchmark_plot-type_variable_input-id.pdf” To view interactive results, naviage to the Benchmarking Plots tab in the shiny app. 7.2.7.6 Benchmarking in pecan.xml Before reading this section, it is recommended that you familiarize yourself with basics of the pecan.xml file. The pecan.xml has an optional benchmarking section. Below are all the tags in the benchmarking section explained. Many of these field are filled in automatically during the benchmarking process when using the benchmarking shiny app. The only time one should edit the benchmarking section by hand is for performing clone runs. See clone run documentation. <benchmarking> settings: ensemble_id: the id of the ensemble that you will be using - the settings from this ensemble will be saved in a reference run record and then ensemble_id will be replaced with reference_run_id new_run: TRUE = create new run, FALSE = use existing run (required, default FALSE) It is possible to look at more than one benchmark with a particular run. The specific settings related to each benchmark are in a sub section called benchmark input_id: the id of the benchmarking data (required) variable_id: the id of the variable of interest within the data. If you leave this blank, all variables that are shared between the input and model output will be used. metric_id: the id(s) of the metric(s) to be calculated. If you leave this blank, all metrics will be used. Example: In this example, - we are using a pre-existing run from ensemble_id = 1000010983 (new_run = FALSE) - the output will be compared to data from input_id = 1000013743, specifically two variables of interest: variable_id = 411, variable_id = 18 - for variable_id = 411 we will perform only one metric of comparison metric_id = 1000000001 - for for variable_id = 18 we will perform two metrics of comparison metric_id = 1000000001, metric_id = 1000000002 <benchmarking> <ensemble_id>1000010983</ensemble_id> <new_run>FALSE</new_run> <benchmark> <input_id>1000013743</input_id> <variable_id>411</variable_id> <site_id>853</site_id> <metrics> <metric_id>1000000001</metric_id> </metrics> </benchmark> <benchmark> <input_id>1000013743</input_id> <variable_id>18</variable_id> <site_id>853</site_id> <metrics> <metric_id>1000000001</metric_id> <metric_id>1000000002</metric_id> </metrics> </benchmark> </benchmarking> "],["developer-guide.html", "8 Developer guide", " 8 Developer guide Update BETY Update PEcAn Code PEcAn and Git Coding Practices "],["updatebety.html", "8.1 Updating PEcAn Code and Bety Database", " 8.1 Updating PEcAn Code and Bety Database Release notes for all releases can be found here. This page will only list any steps you have to do to upgrade an existing system. When updating PEcAn it is highly encouraged to update BETY. You can find instructions on how to do this, as well on how to update the database in the Updating BETYdb gitbook page. 8.1.1 Updating PEcAn The latest version of PEcAn code can be obtained from the PEcAn repository on GitHub: cd pecan # If you are not already in the PEcAn directory git pull The PEcAn build system is based on GNU Make. The simplest way to install is to run make from inside the PEcAn directory. This will update the documentation for all packages and install them, as well as all required dependencies. For more control, the following make commands are available: make document – Use devtools::document to update the documentation for all package. Under the hood, this uses the roxygen2 documentation system. make install – Install all packages and their dependnencies using devtools::install. By default, this only installs packages that have had their code changed and any dependent packages. make check – Perform a rigorous check of packages using devtools::check make test – Run all unit tests (based on testthat package) for all packages, using devtools::test make clean – Remove the make build cache, which is used to track which packages have changed. Cache files are stored in the .doc, .install, .check, and .test subdirectories in the PEcAn main directory. Running make clean will force the next invocation of make commands to operate on all PEcAn packages, regardless of changes. The following are some additional make tricks that may be useful: Install, check, document, or test a specific package – make .<cmd>/<pkg-dir>; e.g. make .install/utils or make .check/modules/rtm Force make to run, even if package has not changed – make -B <command> Run make commands in parallel – make -j<ncores>; e.g. make -j4 install to install packages using four parallel processes. Note that packages containing compiled code (e.g. PEcAn.RTM, PEcAn.BASGRA) might fail when j is greater than 1, because of limitations in the way R calls make internally while compiling them. See GitHub issue 1976 for more details. All instructions for the make build system are contained in the Makefile in the PEcAn root directory. For full documentation on make, see the man pages by running man make from a terminal. "],["pecan-git.html", "8.2 Git and GitHub Workflow", " 8.2 Git and GitHub Workflow Using Git 8.2.1 Using Git This document describes the steps required to download PEcAn, make changes to code, and submit your changes. If you are new to GitHub or to PEcAn, start with the one-time set-up instructions under Before any work is done. Also see the excellent tutorials and references in the Git) section right below this list and at the bootom in References. To make trivial changes, see [Quick and Easy]. To make a few changes to the code, start with the [Basic Workflow]. To make substantial changes and/or if plan to contribute over time see [Recommended Workflow: A new branch for each change]. 8.2.1.1 Git Git is a free & open source, distributed version control system designed to handle everything from small to very large projects with speed and efficiency. Every Git clone is a full-fledged repository with complete history and full revision tracking capabilities, not dependent on network access or a central server. Branching and merging are fast and easy to do. A good place to start is the GitHub 5 minute illustrated tutorial. In addition, there are three fun tutorials for learning git: Learn Git is a great web-based interactive tutorial. LearnGitBranching TryGit. URLs In the rest of the document will use specific URL’s to clone the code. There a few URL’s you can use to clone a project, using https, ssh and git. You can use either https or git to clone a repository and write to it. The git protocol is read-only. This document describes the steps required to download PEcAn, make changes to code, and submit your changes. If during above process you want to work on something else, commit all your code, create a new branch, and work on new branch. 8.2.1.2 PEcAn Project and Github Organization Repository: https://github.com/organizations/PecanProject PEcAn source code: https://github.com/PecanProject/pecan.git BETYdb source code: https://github.com/PecanProject/bety.git These instructions apply to other repositories too. 8.2.1.3 PEcAn Project Branches We follow branch organization laid out on this page. In short, there are three main branches you must be aware of: develop - Main Branch containing the latest code. This is the main branch you will make changes to. master - Branch containing the latest stable code. DO NOT MAKE CHANGES TO THIS BRANCH. release/vX.X.X - Named branches containing code specific to a release. Only make changes to this branch if you are fixing a bug on a release branch. 8.2.1.4 Milestones, Issues, Tasks The Milestones, issues, and tasks can be used to organize specific features or research projects. In general, there is a heirarchy: milestones (Big picture, “Epic”): contains many issues, organized by release. issues (Specific features / bugs, “Story”): may contain a list of tasks; represent task list (to do list, “Tasks”): list of steps required to close an issue, e.g.: * [ ] first do this * [ ] then this * [ ] completed when x and y 8.2.1.5 Editing files on GitHub The easiest approach is to use GitHub’s browser based workflow. This is useful when your change is a few lines, if you are editing a wiki, or if the edit is trivial (and won’t break the code). The GitHub documentation is here but it is simple: finding the page or file you want to edit, click “edit” and then the GitHub web application will automatically forking and branch, then allow you to submit a pull request. However, it should be noted that unless you are a member of the PEcAn project that the “edit” button will not be active and you’ll want to follow the workflow described below for forking and then submitting a pull request. 8.2.2 Recommended Git Workflow Summary: development should occur on a fork of the main repository. Fork Create Branch Develop Push changes to your fork Create pull request from branch on your fork to develop branch on pecanproject/pecan Each feature should be in its own branch (for example each issue is a branch, names of branches are often the issue in a bug tracking system). Commit and Push Frequency On your branch, commit any time that you have done work that you do not want to re-do. Remember, pushing changes to your branch is like saving a draft. Submit a pull request when you are done. 8.2.2.1 Before any work is done The first step below only needs to be done once when you first start working on the PEcAn code. The steps below that need to be done to set up PEcAn on your computer, and would need to be repeated if you move to a new computer. All contributors should create a fork of the PEcAn source code in their own folder see github help: “fork a repo”). This forked repository will allow you to create branches and submit these changes back to GitHub using pull requests to the develop branch of PEcAn. The pull request will start a review process that will eventually result in the code being merged into the main copy of the codebase. See https://help.github.com/articles/fork-a-repo for more information, especially on how to keep your fork up to date with respect to the original. (Rstudio users should also see Git + Rstudio, below). You can setup SSH keys to make it easier to commit cod back to GitHub. This might especially be true if you are working from a cluster, see set up ssh keys There is a script in the scripts folder called scripts/syncgit.sh that will keep your fork in sync with the main pecanproject repository. Introduce yourself to GIT git config --global user.name "FULLNAME" git config --global user.email you@yourdomain.example.com Fork PEcAn on GitHub. Go to the PEcAn source code and click on the Fork button in the upper right. This will create a copy of PEcAn in your personal space. Clone to your local machine via command line git clone git@github.com:<username>/pecan.git Define PEcAnProject/pecan as upstream repository cd pecan git remote add upstream git@github.com:PecanProject/pecan.git 8.2.2.1.1 Hint: Keeping your fork in sync If you have used the instructions above, you can use the helper script called scripts/syncgit.sh to keep the master and develop branches of your own fork in sync with the PEcAnProject/pecan repository. After following the above, your .git/config file will include the following: ... [remote "origin"] url = git@github.com:<username>/pecan.git fetch = +refs/heads/*:refs/remotes/origin/* [branch "develop"] remote = origin merge = refs/heads/develop [remote "upstream"] url = git@github.com:PecanProject/pecan.git fetch = +refs/heads/*:refs/remotes/upstream/* Then, you can run: ./scripts/syncgit.sh Now the master and develop branches on your fork will be up to date. 8.2.2.2 Using Branching Ideally, a developer should create a new branch for each feature or bug fix Make sure you start in the develop branch git checkout develop Make sure develop is up to date git pull upstream develop Run the PEcAn MAKEFILE to compile code from the main directory. make Create a new branch and switch to it git checkout -b <branchname> Work/commit/etc git add <file_that_was_changed.R> git commit -m "<some descriptive information about what was done>" Make sure that code compiles and documentation updated. The make document command will run roxygenise. make document make Push this branch to your github space git push origin <branchname> submit pull request with [[link commits to issues|Using-Git#link-commits-to-issuess]]; also see github documentation 8.2.2.3 After pull request is merged Make sure you start in master git checkout develop` delete branch remotely git push origin --delete <branchname>` delete branch locally git branch -D <branchname>` 8.2.2.4 Link commits to issues You can reference and close issues from comments, pull requests, and commit messages. This should be done when you commit code that is related to or will close/fix an existing issue. There are two ways to do this. One easy way is to include the following text in your commit message: Github to close: “closes gh-xxx” (or syn. close, closed, fixes, fix, fixed) to reference: just the issue number (e.g. “gh-xxx”) 8.2.3 Useful Git tools 8.2.3.1 GitHub Desktop The easiest way to get working with GitHub is by installing the GitHub client. For instructions for your specific OS and download of the GitHub client, see https://help.github.com/articles/set-up-git. This will help you set up an SSH key to push code back to GitHub. To check out a project you do not need to have an ssh key and you can use the https or git url to check out the code. 8.2.3.2 Git + Rstudio Rstudio is nicely integrated with many development tools, including git and GitHub. It is quite easy to check out source code from within the Rstudio program or browser. The Rstudio documentation includes useful overviews of version control and R package development. Once you have git installed on your computer (see the Rstudio version control documentation for instructions), you can use the following steps to install the PEcAn source code in Rstudio. 8.2.4 Advanced 8.2.4.1 Fixing a release Branch If you would like to make changes to a release branch, you must follow a different workflow, as the release branch will not contain the latest code on develop and must remain seperate. Fetch upstream remote branches git fetch upstream Checkout the correct release branch git checkout -b release/vX.Y.Z Compile Code with make make Make changes and commit them git add <changed_file.R> git commit -m \"Describe changes\" Compile and make roxygen changes make make document Commit and push any files that were changed by make document Make a pull request. It is essential that you compare your pull request to the remote release branch, NOT the develop branch. 8.2.4.2 Tags Git supports two types of tags: lightweight and annotated. For more information see the Tagging Chapter in the Git documentation. Lightweight tags are useful, but here we discuss the annotated tags that are used for marking stable versions, major releases, and versions associated with published results. The basic command is git tag. The -a flag means ‘annotated’ and -m is used before a message. Here is an example: git tag -a v0.6 -m \"stable version with foo and bar features, used in the foobar publication by Bob\" Adding a tag to the a remote repository must be done explicitly with a push, e.g. git push v0.6 To use a tagged version, just checkout: git checkout v0.6 To tag an earlier commit, just append the commit SHA to the command, e.g. git tag -a v0.99 -m \"last version before 1.0\" 9fceb02 8.2.5 References 8.2.5.1 Git Documentation Scott Chacon, ‘Pro Git book’, http://git-scm.com/book GitHub help pages, https://help.github.com/ Main GIT page http://git-scm.com/documentation Another set of pages about branching, http://sandofsky.com/blog/git-workflow.html Stackoverflow highest voted questions tagged “git” 8.2.5.2 GitHub Documentation When in doubt, the first step is to click the “Help” button at the top of the page. GitHub Flow by Scott Chacon (Git evangelist and Ruby developer working on GitHub.com) GitHub FAQ Using Pull Requests SSH Keys 8.2.6 GitHub use with PEcAn In this section, development topics are introduced and discussed. PEcAn code lives within the If you are looking for an issue to work on, take a look through issues labled “good first issue”. To get started you will want to review We use GitHub to track development. To learn about GitHub, it is worth taking some time to read through the FAQ. When in doubt, the first step is to click the “Help” button at the top of the page. To address specific people, use a github feature called @mentions e.g. write @dlebauer, @robkooper, @mdietze, or @serbinsh … in the issue to alert the user as described in the GitHub documentation on notifications 8.2.6.1 Bugs, Issues, Features, etc. 8.2.6.2 Reporting a bug (For developers) work through debugging. Once you have identified a problem, that you can not resolve, you can write a bug report Write a bug report submit the bug report If you do find the answer, explain the resolution (in the issue) and close the issue 8.2.6.3 Required content Note: a bug is only a bug if it is reproducible clear bug reports save time Clear, specific title Description - What you did What you expected to happen What actually happened What does work, under what conditions does it fail? Reproduction steps - minimum steps required to reproduce the bug additional materials that could help identify the cause: screen shots stack traces, logs, scripts, output specific code and data / settings / configuration files required to reproduce the bug environment (operating system, browser, hardware) 8.2.6.4 Requesting a feature (from The Pragmatic Programmer, available as ebook through UI libraries, hardcopy on David’s bookshelf) focus on “user stories”, e.g. specific use cases Be as specific as possible, Here is an example: Bob is at www.mysite.edu/maps map of the the region (based on user location, e.g. US, Asia, etc) option to “use current location” is provided, if clicked, map zooms in to, e.g. state or county level for site run: option to select existing site or specify point by lat/lon option to specify a bounding box and grid resolution in either lat/lon or polar stereographic. asked to specify start and end times in terms of year, month, day, hour, minute. Time is recorded in UTC not local time, this should be indicated. 8.2.6.5 Closing an issue Definition of “Done” test documentation when issue is resolved: status is changed to “resolved” assignee is changed to original author if original author agrees that issue has been resolved original author changes status to “closed” except for trivial issues, issues are only closed by the author 8.2.6.6 When to submit an issue? Ideally, non-trivial code changes will be linked to an issue and a commit. This requires creating issues for each task, making small commits, and referencing the issue within your commit message. Issues can be created on GitHub. These issues can be linked to commits by adding text such as fixes gh-5). Rationale: This workflow is a small upfront investment that reduces error and time spent re-creating and debugging errors. Associating issues and commits, makes it easier to identify why a change was made, and potential bugs that could arise when the code is changed. In addition, knowing which issue you are working on clarifies the scope and objectives of your current task. "],["coding-practices.html", "8.3 Coding Practices", " 8.3 Coding Practices 8.3.1 Coding Style Consistent coding style improves readability and reduces errors in shared code. Unless otherwise noted, PEcAn follows the Tidyverse style guide, so please familiarize yourself with it before contributing. In addition, note the following: Document all functions using roxygen2. See Roxygen2 for more details. Put your name on things. Any function that you create or make a meaningful contribution to should have your name listed after the author tag in the function documentation. It is also often a good idea to add your name to extended comments describing particularly complex or strange code. Write unit tests with testthat. Tests are a complement to documentation - they define what a function is (and is not) expected to do. Not all functions necessarily need unit tests, but the more tests we have, the more confident we can be that changes don’t break existing code. Whenever you discover and fix a bug, it is a good idea to write a unit test that makes sure the same bug won’t happen again. See Unit_Testing for instructions, and Advanced R: Tests. Do not use abbreviations. Always write out TRUE and FALSE (i.e. do not use T or F). Do not rely on partial argument matching – write out all arguments in full. Avoid dots in function names. R’s S3 methods system uses dots to denote object methods (e.g. print.matrix is the print method for objects of class matrix), which can cause confusion. Use underscores instead (e.g. do_analysis instead of do.analysis). (NOTE that many old PEcAn functions violate this convention. The plan is to deprecate those in PEcAn 2.0. See GitHub issue #392). Use informative file names with consistent extensions. Standard file extensions are .R for R scripts, .rds for individual objects (via saveRDS function), and .RData (note: capital D!) for multiple objects (via the save function). For function source code, prefer multiple files with fewer functions in each to large files with lots of files (though it may be a good idea to group closely related functions in a single file). File names should match, or at least closely reflect, their files (e.g. function do_analysis should be defined in a file called do_analysis.R). Do not use spaces in file names – use dashes (-) or underscores (_). For using external packages, add the package to Imports: and call the corresponding function with package::function. Do not use @importFrom package function or, worse yet, @import package. (The exception is infix operators like magrittr::%>% or ggplot2::%+%, which can be imported via roxygen2 documentation like @importFrom magrittr %>%). Do not add packages to Depends. In general, try to avoid adding new dependencies (especially ones that depend on system libraries) unless they are necessary or already widely used in PEcAn (e.g. GDAL, NetCDF, XML, JAGS, dplyr). For a more thorough and nuanced discussion, see the package dependencies appendix. "],["dplyr.html", "9 dplyr", " 9 dplyr When running devtools::check() a common warning relating to dplyr code is “no visible binding for global variable ‘x’” In data-masking situations (e.g. mutate(), filter()), you can eliminate this warning by using the .data pronoun. For example, instead of df %>% mutate(newvar = oldvar + 2), use df %>% mutate(newvar = .data$oldvar + 2). In tidy-select situations (e.g. select(), rename()), you can eliminate this warning by using strings instead of naked column names. For example, instead of df %>% select(y) use df %>% select(\"y\"). Using .data inside of select() is deprecated as of tidyselect v1.2.0 9.0.1 Logging During development we often add many print statements to check to see how the code is doing, what is happening, what intermediate results there are etc. When done with the development it would be nice to turn this additional code off, but have the ability to quickly turn it back on if we discover a problem. This is where logging comes into play. Logging allows us to use “rules” to say what information should be shown. For example when I am working on the code to create graphs, I do not have to see any debugging information about the SQL command being sent, however trying to figure out what goes wrong during a SQL statement it would be nice to show the SQL statements without adding any additional code. PEcAn provides a set of logger.* functions that should be used in place of base R’s stop, warn, print, and similar functions. The logger functions make it easier to print to a system log file, and to control the level of output produced by PEcAn. The file test.logger.R provides descriptive examples This query provides an current overview of functions that use logging Logger functions and their corresponding levels (in order of increasing level): logger.debug (\"DEBUG\") – Low-level diagnostic messages that are hidden by default. Good examples of this are expanded file paths and raw results from database queries or other analyses. logger.info (\"INFO\") – Informational messages that regular users may want to see, but which do not indicate anything unexpected. Good examples of this are progress updates updates for long-running processes, or brief summaries of queries or analyses. logger.warn (\"WARN\") – Warning messages about issues that may lead to unexpected but valid results. Good examples of this are interactions between arguments that lead to some arguments being ignored or removal of missing or extreme values. logger.error (\"ERROR\") – Error messages from which PEcAn has some capacity to recover. Unless you have a very good reason, we recommend avoiding this in favor of either logger.severe to actually stop execution or logger.warn to more explicitly indicate that the problem is not fatal. logger.severe – Catastrophic errors that warrant immediate termination of the workflow. This is the only function that actually stops R’s execution (via stop). The logger.setLevel function sets the level at which a message will be printed. For instance, logger.setLevel(\"WARN\") will suppress logger.info and logger.debug messages, but will print logger.warn and logger.error messages. logger.setLevel(\"OFF\") suppresses all logger messages. To print all messages to console, use logger.setUseConsole(TRUE) 9.0.2 Package Data 9.0.2.1 Summary: Files with the following extensions will be read by R as data: plain R code in .R and .r files are sourced using source() text tables in .tab, .txt, .csv files are read using read() ** objects in R image files: .RData, .rda are loaded using load() capitalization matters all objects in foo.RData are loaded into environment pro: easiset way to store objects in R format con: format is application (R) specific Details are in ?data, which is mostly a copy of Data section of Writing R Extensions. 9.0.2.2 Accessing data Data in the [data] directory will be accessed in the following ways, efficient way: (especially for large data sets) using the data function: data(foo) # accesses data with, e.g. load(foo.RData), read(foo.csv), or source(foo.R) easy way: by adding the following line to the package DESCRIPTION: note: this should be used with caution or it can cause difficulty as discussed in redmine issue #1118 LazyData: TRUE From the R help page: Currently, a limited number of data formats can be accessed using the data function by placing one of the following filetypes in a packages’ data directory: * files ending .R or .r are source()d in, with the R working directory changed temporarily to the directory containing the respective file. (data ensures that the utils package is attached, in case it had been run via utils::data.) * files ending .RData or .rda are load()ed. * files ending .tab, .txt or .TXT are read using read.table(..., header = TRUE), and hence result in a data frame. * files ending .csv or .CSV are read using read.table(..., header = TRUE, sep = ';'), and also result in a data frame. If your data does not fall in those 4 categories, or you can use the system.file function to get access to the data: system.file("data", "ed.trait.dictionary.csv", package="PEcAn.utils") [1] "/home/kooper/R/x86_64-pc-linux-gnu-library/2.15/PEcAn.utils/data/ed.trait.dictionary.csv" The arguments are folder, filename(s) and then package. It will return the fully qualified path name to a file in a package, in this case it points to the trait data. This is almost the same as the data function, however we can now use any function to read the file, such as read.csv instead of read.csv2 which seems to be the default of data. This also allows us to store arbitrary files in the data folder, such as the the bug file and load it when we need it. 9.0.2.2.1 Examples of data in PEcAn packages outputs: [/modules/uncertainties/data/output.RData] parameter samples [/modules/uncertainties/data/samples.RData] 9.0.3 Documenting functions using roxygen2 This is the standard method for documenting R functions in PEcAn. For detailed instructions, see one of the following resources: roxygen2 pacakge documentation Formatting overview Markdown formatting Namespaces (e.g. when to use @export) From “R packages” by Hadley Wickham: Object Documentation Package Metadata Below is a complete template for a Roxygen documentation block. Note that roxygen lines start with #': #' Function title, in a few words #' #' Function description, in 2-3 sentences. #' #' (Optional) Package details. #' #' @param argument_1 A description of the argument #' @param argument_2 Another argument to the function #' @return A description of what the function returns. #' #' @author Your name <your_email@email.com> #' @examples #' \\dontrun{ #' # This example will NOT be run by R CMD check. #' # Useful for long-running functions, or functions that #' # depend on files or values that may not be accessible to R CMD check. #' my_function("~/user/my_file") #'} # # This example WILL be run by R CMD check #' my_function(1:10, argument_2 = 5) ## ^^ A few examples of the function's usage #' @export # ^^ Whether or not the function will be "exported" (made available) to the user. # If omitted, the function can only be used inside the package. my_function <- function(argument_1, argument_2) {...} Here is a complete example from the PEcAn.utils::days_in_year() function: #' Number of days in a year #' #' Calculate number of days in a year based on whether it is a leap year or not. #' #' @param year Numeric year (can be a vector) #' @param leap_year Default = TRUE. If set to FALSE will always return 365 #' #' @author Alexey Shiklomanov #' @return integer vector, all either 365 or 366 #' @export #' @examples #' days_in_year(2010) # Not a leap year -- returns 365 #' days_in_year(2012) # Leap year -- returns 366 #' days_in_year(2000:2008) # Function is vectorized over years days_in_year <- function(year, leap_year = TRUE) {...} To update documentation throughout PEcAn, run make document in the PEcAn root directory. Make sure you do this before opening a pull request – PEcAn’s automated testing (Travis) will check if any documentation is out of date and will throw an error like the following if it is: These files were changed by the build process: {...} "],["developer-testing.html", "9.1 Testing", " 9.1 Testing PEcAn uses two different kinds of testing – unit tests and integration tests. 9.1.1 Unit testing Unit tests are short (<1 minute runtime) tests of functionality of specific functions. Ideally, every function should have at least one unit test associated with it. A unit test should be written for each of the following situations: Each bug should get a regression test. The first step in handling a bug is to write code that reproduces the error This code becomes the test most important when error could re-appear essential when error silently produces invalid results Every time a (non-trivial) function is created or edited Write tests that indicate how the function should perform example: expect_equal(sum(1,1), 2) indicates that the sum function should take the sum of its arguments Write tests for cases under which the function should throw an error example: expect_error(sum(\"foo\")) better : expect_error(sum(\"foo\"), \"invalid 'type' (character)\") Any functionality that you would like to protect over the long term. Functionality that is not tested is more likely to be lost. PEcAn uses the testthat package for unit testing. A general overview of is provided in the “Testing” chapter of Hadley Wickham’s book “R packages”. Another useful resource is the testthat package documentation website. See also our testthat appendix. Below is a lightning introduction to unit testing with testthat. Each package’s unit tests live in .R scripts in the folder <package>/tests/testthat. In addition, a testthat-enabled package has a file called <packagename>/tests/testthat.R with the following contents: library(testthat) library(<packagename>) test_check("<packagename>") Tests should be placed in <packagename>/tests/testthat/test-<sourcefilename>.R, and look like the following: context("Mathematical operators") test_that("mathematical operators plus and minus work as expected",{ sum1 <- sum(1, 1) expect_equal(sum1, 2) sum2 <- sum(-1, -1) expect_equal(sum2, -2) expect_equal(sum(1,NA), NA) expect_error(sum("cat")) set.seed(0) expect_equal(sum(matrix(1:100)), sum(data.frame(1:100))) }) test_that("different testing functions work, giving excuse to demonstrate",{ expect_identical(1, 1) expect_identical(numeric(1), integer(1)) expect_equivalent(numeric(1), integer(1)) expect_warning(mean('1')) expect_that(mean('1'), gives_warning("argument is not numeric or logical: returning NA")) expect_warning(mean('1'), "argument is not numeric or logical: returning NA") expect_message(message("a"), "a") }) 9.1.2 Integration testing Integration tests consist of running the PEcAn workflow in full. One way to do integration tests is to manually run workflows for a given version of PEcAn, either through the web interface or by manually creating a pecan.xml file. Such manual tests are an important part of checking PEcAn functionality. Alternatively, the base/workflow/inst/batch_run.R script can be used to quickly run a series of user-specified integration tests without having to create a bunch of XML files. This script is powered by the PEcAn.workflow::create_execute_test_xml() function, which takes as input information about the model, meteorology driver, site ID, run dates, and others, uses these to construct a PEcAn XML file, and then uses the system() command to run a workflow with that XML. If run without arguments, batch_run.R will try to run the model configurations specified in the base/workflow/inst/default_tests.csv file. This file contains a CSV table with the following columns: model – The name of the model (models.model_name column in BETY) revision – The version of the model (models.revision column in BETY) met – The name of the meteorology driver source site_id – The numeric site ID for the model run (sites.site_id) pft – The name of the plant functional type to run. If NA, the script will use the first PFT associated with the model. start_date, end_date – The start and end dates for the model run, respectively. These should be formatted according to ISO standard (YYYY-MM-DD, e.g. 2010-03-16) sensitivity – Whether or not to run the sensitivity analysis. TRUE means run it, FALSE means do not. ensemble_size – The number of ensemble members to run. Set this to 1 to do a single run at the trait median. comment – An string providing some user-friendly information about the run. The batch_run.R script will run a workflow for every row in the input table, sequentially (for now; eventually, it will try to run them in parallel), and at the end of each workflow, will perform some basic checks, including whether or not the workflow finished and if the model produced any output. These results are summarized in a CSV table (by default, a file called test_result_table.csv), with all of the columns as the input test CSV plus the following: outdir – Absolute path to the workflow directory. workflow_complete – Whether or not the PEcAn workflow completed. Note that this is a relatively low bar – PEcAn workflows can complete without having run the model or finished some other steps. has_jobsh – Whether or not PEcAn was able to write the model’s job.sh script. This is a good indication of whether or not the model’s write.configs step was successful, and may be useful for separating model configuration errors from model execution errors. model_output_raw – Whether or not the model produced any output files at all. This is just a check to see of the <workflow>/out directory is empty or not. Note that some models may produce logfiles or similar artifacts as soon as they are executed, whether or not they ran even a single timestep, so this is not an indication of model success. model_output_processed – Whether or not PEcAn was able to post-process any model output. This test just sees if there are any files of the form YYYY.nc (e.g. 1992.nc) in the <workflow>/out directory. Right now, these checks are not particularly robust or comprehensive, but they should be sufficient for catching common errors. Development of more, better tests is ongoing. The batch_run.R script can take the following command-line arguments: --help – Prints a help message about the script’s arguments --dbfiles=<path> – The path to the PEcAn dbfiles folder. The default value is ~/output/dbfiles, based on the file structure of the PEcAn VM. Note that for this and all other paths, if a relative path is given, it is assumed to be relative to the current working directory, i.e. the directory from which the script was called. --table=<path> – Path to an alternate test table. The default is the base/workflow/inst/default_tests.csv file. See preceding paragraph for a description of the format. --userid=<id> – The numeric user ID for registering the workflow. The default value is 99000000002, corresponding to the guest user on the PEcAn VM. --outdir=<path> – Path to a directory (which will be created if it doesn’t exist) for storing the PEcAn workflow outputs. Default is batch_test_output (in the current working directory). --pecandir=<path> – Path to the PEcAn source code root directory. Default is the current working directory. --outfile=<path> – Full path (including file name) of the CSV file summarizing the results of the runs. Default is test_result_table.csv. The format of the output 9.1.3 Continuous Integration Every time anyone commits a change to the PEcAn code, the act of pushing to GitHub triggers an automated build and test of the full PEcAn codebase, and all pull requests must report a successful CI build before they will be merged. This will sometimes feel like a burden when the build breaks on an issue that looks trivial, but anything that breaks the build is important enough to fix. It’s much better to find errors early and fix them before they get incorporated into the released PEcAn code. At this writing PEcAn’s CI builds primarily use GitHub Actions and the rest of this section assumes a GitHub Actions. All our GitHub Actions builds run in a containers using different versions of R in parallel. The build will use the latest pecan/depends container for that specific R version. Each night this depends image is rebuild. Each build starts by launching a separate clean virtual machine for each R version and performs roughly the following actions on all of them: * Compile the source code in the container - Installs all the R packages that are declared as dependencies in any PEcAn package, as computed by scripts/generate_dependencies.R. - This will also check to see if any files have been modified during this step * Run the tests inside the container, and checks to see if they all pass - This will also check to see if any files have been modified during this step * Run the doxygen command inside the container - This will also check to see if any files have been modified during this step * Run the check command inside the container, and checks if there are any new warnings and/or errors - Runs package unit tests (the same ones you run locally with make test or devtools::test(pkgname)). - As discussed in Unit testing, these tests should run quickly and test individual components in relative isolation. - Any test that calls the skip_on_ci function will be skipped. This is useful for tests that need to run for a very long time (e.g. large data product downloads) or require resources that aren’t available on Travis (e.g. specific models), but be sure to run these tests locally before pushing your code! - This will also check to see if any files have been modified during this step - Any ERROR in the check output will stop the build immediately. - If there are no ERRORs, any WARNINGs or NOTEs are compared against a stored historic check result in <package>/tests/Rcheck_reference.log. If the package has no stored reference result, all WARNINGs and NOTEs are considered newly added and reported as build failures. - If all messages from the current built were also present in the reference result, the check passes. If any messages are newly added, a build failure is reported. - Each line of the check log is considered a separate message, and the test requires exact matching, so a change from Undocumented arguments in documentation object 'foo': 'x' to Undocumented arguments in documentation object 'foo': 'x', 'y' will be counted as a new warning… and you should fix both of them while you’re at it! - The idea here is to enforce good coding practice and catch likely errors in all new code while recognizing that we have a lot of legacy code whose warnings need to be fixed as we have time rather than all at once. - As we fix historic warnings, we will revoke their grandfathered status by removing them from the stored check results, so that they will break the build if they reappear. - If your PR reports a failure in pre-existing code that you think ought to be grandfathered, please fix it as part of your PR anyway. It’s frustrating to see tests complain about code you didn’t touch, but the failures all need to be cleaned up eventually and it’s likely easier to fix the error than to figure out how to re-ignore it. * Run a simple integration test using SIPNET model * Create the docker images - Once your PR is merged, it will push them to DockerHub and github container repository. * Compiles the PEcAn documentation book (book_source) and the tutorials (documentation/tutorials) and uploads them to the PEcAn website. - This is only done for commits to the master or develop branch, so changes to in-progress pull requests never change the live documentation until after they are merged. If your build fails and indicates that files have been modified there are a few common causes. It should also list the files that have changes, and what has changed. * The most common cause is that you forgot to Roxygenize before committing. * This step will also detect newly added files, e.g. tests improperly writing to the current working directory rather than tempdir() and then failing to clean up after themselves. If any of the actionsreports an error, the build is marked as “failed”. If they all pass, the GitHub actions marks the build as successful and tells the PR that it’s OK to allow your changes to be merged… but the final decision belongs to the human reviewing your code and they might still ask you for other changes! "],["download-and-compile-pecan.html", "9.2 Download and Compile PEcAn", " 9.2 Download and Compile PEcAn Set R_LIBS_USER CRAN Reference # point R to personal lib folder echo 'export R_LIBS_USER=${HOME}/R/library' >> ~/.profile source ~/.profile mkdir -p ${R_LIBS_USER} 9.2.1 Download, compile and install PEcAn from GitHub # download pecan cd git clone https://github.com/PecanProject/pecan.git # compile pecan cd pecan make For more information on the capabilities of the PEcAn Makefile, check out our section on Updating PEcAn. Following will run a small script to setup some hooks to prevent people from using the pecan demo user account to check in any code. # prevent pecan user from checking in code ./scripts/create-hooks.sh 9.2.2 PEcAn Testrun Do the run, this assumes you have installed the BETY database, sites tar file and SIPNET. # create folder cd mkdir testrun.pecan cd testrun.pecan # copy example of pecan workflow and configuration file cp ../pecan/tests/pecan32.sipnet.xml pecan.xml cp ../pecan/scripts/workflow.R workflow.R # exectute workflow rm -rf pecan ./workflow.R pecan.xml NB: pecan.xml is configured for the virtual machine, you will need to change the field from ‘/home/carya/’ to wherever you installed your ‘sites’, usually $HOME "],["directory-structure.html", "9.3 Directory structure", " 9.3 Directory structure 9.3.1 Overview of PEcAn repository as of PEcAn 1.5.3 pecan/ +- base/ # Core functions +- all # Dummy package to load all PEcAn R packages +- db # Modules for querying the database +- logger # Report warnings without killing workflows +- qaqc # Model skill testing and integration testing +- remote # Communicate with and execute models on local and remote hosts +- settings # Functions to read and manipulate PEcAn settings files +- utils # Misc. utility functions +- visualization # Advanced PEcAn visualization module +- workflow # functions to coordinate analysis steps +- book_source/ # Main documentation and developer's guide +- CHANGELOG.md # Running log of changes in each version of PEcAn +- docker/ # Experimental replacement for PEcAn virtual machine +- documentation # index_vm.html, references, other misc. +- models/ # Wrappers to run models within PEcAn +- ed/ # Wrapper scripts for running ED within PEcAn +- sipnet/ # Wrapper scripts for running SIPNET within PEcAn +- ... # Wrapper scripts for running [...] within PEcAn +- template/ # Sample wrappers to copy and modify when adding a new model +- modules # Core modules +- allometry +- data.atmosphere +- data.hydrology +- data.land +- meta.analysis +- priors +- rtm +- uncertainty +- ... +- scripts # R and Shell scripts for use with PEcAn +- shiny/ # Interactive visualization of model results +- tests/ # Settings files for host-specific integration tests +- web # Main PEcAn website files 9.3.2 Generic R package structure: see the R development wiki for more information on writing code and adding data. +- DESCRIPTION # short description of the PEcAn library +- R/ # location of R source code +- man/ # Documentation (automatically compiled by Roxygen) +- inst/ # files to be installed with package that aren't R functions +- extdata/ # misc. data files (in misc. formats) +- data/ # data used in testing and examples (saved as *.RData or *.rda files) +- NAMESPACE # declaration of package imports and exports (automatically compiled by Roxygen) +- tests/ # PEcAn testing scripts +- testthat/ # nearly all tests should use the testthat framework and live here "],["working-with-vm.html", "10 VM configuration and maintenance ", " 10 VM configuration and maintenance "],["maintain-vm.html", "10.1 Updating the VM", " 10.1 Updating the VM The PEcAn VM is distributed with specific versions of PEcAn compiled. However, you do not need to constantly download the VM in order to update your code and version of BETY. To update and maintain your code you can follow the steps found in the Developer section in Updating PecAn and Code and BETY Database. However, if you are using the latest version of the PEcAn VM (>=1.7) you have the Dockerized version of PEcAn and need to follow the instructions under the DOCKER section to update your PEcAn and BETY containers. "],["ssh-vm.html", "10.2 Connecting to the VM via SSH", " 10.2 Connecting to the VM via SSH Once the VM is running anywhere on your machine, you can connect to it from a separate terminal via SSH as follows: ssh -p 6422 carya@localhost You will be prompted for a password. Like everywhere else in PEcAn, the username is carya and the password is illinois. The same password is used for any system maintenance you wish to do on the VM via sudo. As a shortcut, you can add the following to your ~/.ssh/config file (or create one if it does not exist). Host pecan-vm Hostname localhost Port 6422 user carya ForwardX11Trusted yes This will allow you to SSH into the VM with the simplified command, ssh pecan-vm. "],["ssh-vm-bety.html", "10.3 Connecting to BETYdb on the VM via SSH", " 10.3 Connecting to BETYdb on the VM via SSH Sometimes, you may want to develop code locally but connect to an instance of BETYdb on the VM. To do this, first open a new terminal and connect to the VM while enabling port forwarding (with the -L flag) and setting the port number. Using 5433 does not conflict with the postgres default port of 5432, the forwarded port will not conflict with a postgres database server running locally. ssh -L 5433:localhost:5432 carya@localhost:6422 This makes port 5433 on the local machine match port 5432 on the VM. This means that connecting to localhost:5433 will give you access to BETYdb on the VM. To test this on the command line, try the following command, which, if successful, will drop you into the psql console. psql -d bety -U bety -h localhost -p 5433 To test this in R, open a Postgres using the analogous parameters: library(DBI) library(RPostgres) con <- dbConnect( drv = Postgres(), user = "bety", password = "bety", dbname = "bety", host = "localhost", port = 5433 ) dbListTables(con) # This should return a vector of bety tables Note that the same general approach will work on any BETYdb server where port forwarding is enabled, but it requires ssh access. 10.3.1 Using Amazon Web Services for a VM (AWS) Login to Amazon Web Services (AWS) and select the EC2 Dashboard. If this is your first time using AWS you will need to set up an account before you are able to access the EC2 Dashboard. Important: You will need a credit card number and access to a phone to be able to verify AWS account registration. AWS is free for one year. Choose AMI On the top right next to your name, make sure the location setting is on U.S. East (N. Virginia), not U.S. West (Oregon) On the left click, click on EC2 (Virtual servers), then click on “AMIs”, also on the left In the search window toggle to change “Owned by me” to “Public images” Type “pecan” into the search window Click on the toggle button on the left next to PEcAn1.4.6 Click on the “Launch” button at the top Choose an Instance Type Select what type of machine you want to run. For this demo the default, t2.micro, will be adequate. Be aware that different machine types incur very different costs, from 1.3 cents/hour to over $5/hr https://aws.amazon.com/ec2/pricing/ Select t2.micro, then click “Next: Configure Instance Details” Configure Instance Details The defaults are OK. Click “Next: Add Storage” Add Storage The defaults are OK. Click “Next: Tag Instance” Tag Instance You can name your instance if you want. Click “Next: Configure Security Group” Configure Security Group You will need to add two new rules: Click “Add Rule” then select “HTTP” from the pull down menu. This rule allows you to access the webserver on PEcAn. Click “Add Rule”, leave the pull down on “Custom TCP Rule”, and then change the Port Range from 0 to 8787. Set “Source” to Anywhere. This rule allows you to access RStudio Server on PEcAn. Click “Review and Launch” . You will then see this pop-up: Select the default drive volume type and click Next Review and Launch Review the settings and then click “Launch”, which will pop up a select/create Key Pair window. Key Pair Select “Create a new key pair” and give it a name. You won’t actually need this key unless you need to SSH into your PEcAn server, but AWS requires you to create one. Click on “Download Key Pair” then on “Launch Instances”. Next click on “View Instances” at the bottom of the following page. Instances You will see the status of your PEcAn VM, which will take a minute to boot up. Wait until the Instance State reads “running”. The most important piece of information here is the Public IP, which is the URL you will need in order to access your PEcAn instance from within your web browser (see Demo 1 below). Be aware that it often takes ~1 hr for AWS instances to become fully operational, so if you get an error when you put the Public IP in you web browser, most of the time you just need to wait a bit longer. Congratulations! You just started a PEcAn server in the “cloud”! When you are done using PEcAn, you will want to return to the “Instances” menu to turn off your VM. To STOP the instance (which will turn the machine off but keep your work), select your PEcAn instance and click Actions > Instance state > Stop. Be aware that a stopped instance will still accrue a small storage cost on AWS. To restart this instance at any point in the future you do not want to repeat all the steps above, but instead you just need to select your instance and then click Actions > Instance state > Start To TERMINATE the instance (which will DELETE your PEcAn machine), select your instance and click Actions > Instance state > Terminate. Terminated instances will not incur costs. In most cases you will also want to go to the Volumes menu and delete the storage associated with your PEcAn VM.Remember, AWS is free for one year, but will automatically charge a fee in second year if account is not cancelled. 10.3.2 Creating a Virtual Machine First create virtual machine # ---------------------------------------------------------------------- # CREATE VM USING FOLLOWING: # - VM NAME = PEcAn # - CPU = 2 # - MEMORY = 2GB # - DISK = 100GB # - HOSTNAME = pecan # - FULLNAME = PEcAn Demo User # - USERNAME = xxxxxxx # - PASSWORD = yyyyyyy # - PACKAGE = openssh # ---------------------------------------------------------------------- To enable tunnels run the following on the host machine: VBoxManage modifyvm "PEcAn" --natpf1 "ssh,tcp,,6422,,22" VBoxManage modifyvm "PEcAn" --natpf1 "www,tcp,,6480,,80" Make sure machine is up to date. UBUNTU sudo apt-get update sudo apt-get -y dist-upgrade sudo reboot CENTOS/REDHAT sudo yum -y update sudo reboot Install compiler and other packages needed and install the tools. UBUNTU sudo apt-get -y install build-essential linux-headers-server dkms CENTOS/REDHAT sudo yum -y groupinstall "Development Tools" sudo yum -y install wget Install Virtual Box additions for better integration sudo mount /dev/cdrom /mnt sudo /mnt/VBoxLinuxAdditions.run sudo umount /mnt sudo usermod -a -G vboxsf carya Finishing up the machine Add a message to the login: sudo -s export PORT=$( hostname | sed 's/pecan//' ) cat > /etc/motd << EOF PEcAn version 1.4.3 For more information about: Pecan - http://pecanproject.org BETY - http://www.betydb.org For a list of all models currently navigate [here](../users_guide/basic_users_guide/models_table.md) You can access this system using a webbrowser at http://<hosting machine>:${PORT}80/ or using SSH at ssh -l carya -p ${PORT}22 <hosting machine> where <hosting machine> is the machine where the VM runs on. EOF exit Finishing up Script to clean the VM and remove as much as possible history cleanvm.sh wget -O ~/cleanvm.sh http://isda.ncsa.uiuc.edu/~kooper/EBI/cleanvm.sh chmod 755 ~/cleanvm.sh Make sure machine has SSH keys rc.local sudo wget -O /etc/rc.local http://isda.ncsa.illinois.edu/~kooper/EBI/rc.local Change the resolution of the console sudo sed -i -e 's/#GRUB_GFXMODE=640x480/GRUB_GFXMODE=1024x768/' /etc/default/grub sudo update-grub Once all done, stop the virtual machine history -c && ${HOME}/cleanvm.sh "],["pecan-standards.html", "11 PEcAn standard formats ", " 11 PEcAn standard formats "],["defining-new-input-formats.html", "11.1 Defining new input formats", " 11.1 Defining new input formats New formats can be defined on the ‘formats’ page of BETYdb After creating a new format, the contents should be defined by specifying the BETYdb variable name and the name used in the file/ "],["time-standard.html", "11.2 Time Standard", " 11.2 Time Standard Internal PEcAn standard time follows ISO_8601 format for dates and time (https://en.wikipedia.org/wiki/ISO_8601). For example ordinal dates go from 1 365/366 (https://en.wikipedia.org/wiki/ISO_8601#Ordinal_dates). However, time used in met drivers or model outputs follows CF convention with julian dates following the 0 to 364/365 format To aid in the conversion between PEcAn internal ISO_8601 standard and CF convention used in all met drivers and PEcAn standard output you can utilize the functions: “cf2datetime”,“datetime2doy”,“cf2doy”, and for SIPNET “sipnet2datetime” 11.2.1 Input Standards 11.2.1.1 Meterology Standards 11.2.1.1.1 Dimensions: CF standard-name units time days since 1700-01-01 00:00:00 UTC longitude degrees_east latitude degrees_north General Note: dates in the database should be date-time (preferably with timezone), and datetime passed around in PEcAn should be of type POSIXct. 11.2.1.1.2 The variable names should be standard_name preferred variables indicated in bold wind_direction has no CF equivalent and should not be converted, instead the met2CF functions should convert wind_direction and wind_speed to eastward_wind and northward_wind standard_name is CF-convention standard names units can be converted by udunits, so these can vary (e.g. the time denominator may change with time frequency of inputs) soil moisture for the full column, rather than a layer, is soil_moisture_content A full list of PEcAn standard variable names, units and dimensions can be found here: https://github.com/PecanProject/pecan/blob/develop/base/utils/data/standard_vars.csv For example, in the MsTMIP-CRUNCEP data, the variable rain should be precipitation_rate. We want to standardize the units as well as part of the met2CF.<product> step. I believe we want to use the CF “canonical” units but retain the MsTMIP units any time CF is ambiguous about the units. The key is to process each type of met data (site, reanalysis, forecast, climate scenario, etc) to the exact same standard. This way every operation after that (extract, gap fill, downscale, convert to a model, etc) will always have the exact same inputs. This will make everything else much simpler to code and allow us to avoid a lot of unnecessary data checking, tests, etc being repeated in every downstream function. 11.2.2 Soils and Vegetation Inputs 11.2.2.0.1 Soil Data Check out the Soil Data section on more into on creating a standard soil data file. 11.2.2.0.2 Vegetation Data Check Out the Vegetation Data section on more info on creating a standard vegetation data file 11.2.3 Output Standards created by model2netcdf functions based on format used by MsTMIP Can be seen at HERE We originally used the MsTMIP conventions. Since then, we’ve added the PaLEON variable conventions to our standard as well. If a variable isn’t in one of those two, we stick to the CF conventions. "],["pecanXML.html", "12 The PEcAn XML", " 12 The PEcAn XML The PEcAn system is configured using a XML file, often called pecan.xml. It contains the following major sections (“nodes”): Core configuration Top level structure info – Run metadata outdir – Output directory database – PEcAn database settings pft – Plant functional type selection meta.analysis – Trait meta analysis model – Model configuration run – Run setup host – Host information for remote execution Advanced features ensemble – Ensemble runs sensitivity.analysis – Sensitivity analysis parameter.data.assimilation – Parameter data assimilation multi.settings – Multi Site Settings (experimental) state.data.assimilation – State data assimilation (experimental) browndog – Brown Dog configuration (experimental) benchmarking – Benchmarking remote_process – Remote data module A basic example looks like this: <?xml version="1.0" encoding="UTF-8"?> <pecan> <info> <notes>Example run</notes> <userid>-1</userid> <username>guestuser</username> <date>2018/09/18 19:12:28 +0000</date> </info> <outdir>/data/workflows/PEcAn_99000000006</outdir> <database> <bety> <user>bety</user> <password>bety</password> <host>postgres</host> <dbname>bety</dbname> <driver>PostgreSQL</driver> <write>true</write> </bety> <dbfiles>/data/dbfiles</dbfiles> </database> <pfts> <pft> <name>tundra.grasses</name> <constants> <num>1</num> </constants> </pft> </pfts> <meta.analysis> <iter>3000</iter> <random.effects> <on>FALSE</on> <use_ghs>TRUE</use_ghs> </random.effects> </meta.analysis> <ensemble> <size>1</size> <variable>NPP</variable> <samplingspace> <parameters> <method>uniform</method> </parameters> <met> <method>sampling</method> </met> </samplingspace> </ensemble> <model> <id>5000000002</id> </model> <workflow> <id>99000000006</id> </workflow> <run> <site> <id>1000000098</id> <met.start>2004/01/01</met.start> <met.end>2004/12/31</met.end> </site> <inputs> <met> <source>CRUNCEP</source> <output>SIPNET</output> </met> </inputs> <start.date>2004/01/01</start.date> <end.date>2004/12/31</end.date> </run> <host> <name>localhost</name> <rabbitmq> <uri>amqp://guest:guest@rabbitmq:5672/%2F</uri> <queue>SIPNET_136</queue> </rabbitmq> </host> </pecan> In the following sections, we step through each of these sections in detail. "],["xml-core-config.html", "12.1 Core configuration", " 12.1 Core configuration 12.1.1 Top-level structure The first line of the XML file should contain version and encoding information. <?xml version="1.0" encoding="UTF-8"?> The rest of the XML file should be surrounded by <pecan>...</pecan> tags. <pecan> ...XML body here... </pecan> 12.1.2 info: Run metadata This section contains run metadata. This information is not essential to a successful model run, but is useful for tracking run provenance. <info> <notes>Example run</notes> <userid>-1</userid> <username>guestuser</username> <date>2018/09/18 19:12:28 +0000</date> </info> The <notes> tag will be filled in by the web GUI if you provide notes, or you can add notes yourself within these tags. We suggest adding notes that help identify your run and a brief description of what the run is for. Because these notes are searchable within the PEcAn database and web interface, they can be a useful way to distinguish between similar runs. The <userid> and <username> section is filled in from the GUI if you are signed in. If you are not using the GUI, add the user name and ID you are associated with that exists within the PEcAn database. The <date></date> tag is filled automatically at the time of your run from the GUI. If you are not using the GUI, add the date you execute the run. This tag is not the tag for the dates you would like to run your model simulation. 12.1.3 outdir: Output directory The <outdir> tag is used to configure the output folder used by PEcAn. This is the directory where all model input and output files will be stored. By default, the web interface names this folder PEcAn_<workflow ID>, and higher-level location is set by the $output_folder$ variable in the web/config.php file. If no outdir is specified, PEcAn defaults to the working directory from which it is called, which may be counterintuitive. <outdir>/data/workflows/PEcAn_99000000006</outdir> 12.1.4 database: PEcAn database settings 12.1.4.1 bety: PEcAn database (Bety) configuration The bety tag defines the driver to use to connect to the database (we support PostgreSQL, which is the default, and Postgres) and parameters required to connect to the database. Note that connection parameters are passed exactly as entered to the underlying R database driver, and any invalid or extra parameters will result in an error. In other words, this configuration… <database> ... <bety> <user>bety</user> <password>bety</password> <host>postgres</host> <dbname>bety</dbname> <driver>PostgreSQL</driver> <port>5432</port> <write>true</write> </bety> ... </database> …will be translated (by way of PEcAn.DB::db.open(settings$database$bety)) into R code like the following: con <- DBI::dbConnect( drv = RPostgreSQL::PostgreSQL(), user = "bety", password = "bety", dbname = "bety", host = "postgres", port = "5432", write = TRUE ) Common parameters are described as follows: driver: The driver to use to connect to the database. This should always be set to PostgreSQL, unless you absolutely know what you’re doing. dbname: The name of the database (formerly name), corresponding to the -d argument to psql. In most cases, this should be set to bety, and will only be different if you named your Bety instance something else (e.g. if you have multiple instances running at once). If unset, it will default to the user name of the current user, which is usually wrong! user: The username to connect to the database (formerly userid), corresponding to the -U argument to psql. default value is the username of the current user logged in (PostgreSQL uses user for this field). password: The password to connect to the database (was passwd), corresponding to the -p argument to psql. If unspecified, no password is used. On standard PEcAn installations, the username and password are both bety (all lowercase). host: The hostname of the bety database, corresponding to the -h argument to psql. On the VM, this will be localhost (the default). If using docker, this will be the name of the PostgreSQL container, which is postgres if using our standard docker-compose. If connecting to the PEcAn database on a remote server (e.g. psql-pecan.bu.edu), this should be the same as the hostname used for ssh access. write: Logical. If true (the default), write results to the database. If false, PEcAn will run but will not store any information to bety. When using the web interface, this section is configured by the web/config.php file. The default config.php settings on any given platform (VM, Docker, etc.) or in example files (e.g. config.php.example) are a good place to get default values for these fields if writing pecan.xml by hand. Key R functions using these parameters are as follows: PEcAn.DB::db.open – Open a database connection and create a connection object, which is used by many other functions for communicating with the PEcAn database. 12.1.4.2 dbfiles: Location of database files The dbfiles is a path to the local location of files needed to run models using PEcAn, including model executables and inputs. <database> ... <dbfiles>/data/dbfiles</dbfiles> ... </database> 12.1.4.3 (Experimental) fia: FIA database connection parameters If a version of the FIA database is available, it can be configured using <fia> node, whose syntax is identical to that of the <bety> node. <database> ... <fia> <dbname>fia5data</dbname> <username>bety</username> <password>bety</password> <host>localhost</host> </fia> ... </database> Currently, this is only used for extraction of specific site vegetation information (notably, for ED2 css, pss, and site files). Stability not ensured as of 1.5.3. 12.1.5 pft: Plant functional type selection The PEcAn system requires at least 1 plant functional type (PFT) to be specified inside the <pfts> section. <pfts> <pft> <name>tundra.grasses</name> <constants> <num>1</num> </constants> <posterior.files>Path to a post.distns.*.Rdata or prior.distns.Rdata</posterior.files> </pft> </pfts> name : (required) the name of the PFT, which must exactly match the name in the PEcAn database. outdir: (optional) Directory path in which PFT-specific output will be stored during meta-analysis and sensitivity analysis. If not specified (recommended), it will be written into <outdir>/<pftname>. contants: (optional) this section contains information that will be written directly into the model specific configuration files. For example, some models like ED2 use PFT numbers instead of names for PFTs, and those numbers can be specified here. See documentation for model-specific code for details. posterior.files (Optional) this tag helps to signal write.config functions to use specific posterior/prior files (such as HPDA or MA analysis) for generating samples without needing to access to the bety database. `` This information is currently used by the following PEcAn workflow function: get.traits - ?????? 12.1.6 meta.analysis: Trait Meta Analysis The section meta.analysis needs to exists for a meta.analysis to be executed, even though all tags inside are optional. Conversely, if you do not want to do a trait meta-analysis (e.g. if you want to manually set all parameters), you should omit this node. <meta.analysis> <iter>3000</iter> <random.effects> <on>FALSE</on> <use_ghs>TRUE</use_ghs> </random.effects> </meta.analysis> Some of the tags that can go in this section are: iter: MCMC (Markov Chain Monte Carlo) chain length, i.e. the total number of posterior samples in the meta-analysis, default is 3000. Smaller numbers will run faster but produce larger errors. random.effects: Settings related to whether to include random effects (site, treatment) in meta-analysis model. on: Default is set to FALSE to work around convergence problems caused by an over parameterized model (e.g. too many sites, not enough data). Can be turned to TRUE for including hierarchical random effects. use_ghs: Default is set to TRUE to include greenhouse measurements. Can be set to FALSE to exclude cases where all data is from greenhouse. update: Should previous results of meta.analysis and get.traits be re-used. If set to TRUE the meta-analysis and get.trait.data will always be executed. Setting this to FALSE will try and reuse existing results. Future versions will allow for AUTO as well which will try and reuse if the PFT/traits have not changed. The default value is FALSE. threshold: threshold for Gelman-Rubin convergence diagnostic (MGPRF); default is 1.2. This information is currently used by the following PEcAn workflow function: PEcAn.MA::run.meta.analysis - ??? 12.1.7 model: Model configuration This section describes which model PEcAn should run and some instructions for how to run it. <model> <id>7</id> <type>ED2</type> <binary>/usr/local/bin/ed2.r82</binary> <prerun>module load hdf5</prerun> <config.header> <!--...xml code passed directly to config file...--> </config.header> </model> Some important tags are as follows: id – The unique numeric ID of the model in the PEcAn database models table. If this is present, then type and binary are optional since they can be determined from the PEcAn database. type – The model “type”, matching the PEcAn database modeltypes table name column. This also refers to which PEcAn model-specific package will be used. In PEcAn, a “model” refers to a specific version (e.g. release, git commit) of a specific model, and “model type” is used to link different releases of the same model. Model “types” also have specific PFT definitions and other requirements associated with them (e.g. the ED2 model “type” requires a global land cover database). binary – The file path to the model executable. If omitted, PEcAn will use whatever path is registered in the PEcAn database for the current machine. prerun – Additional options added to the job.sh script, which is used to execute the model. This is useful for setting specific environment variables, load modules, etc. This information is currently used by the following PEcAn workflow function: PEcAn.<MODEL>::write.config.<MODEL> – Write model-specific configuration files PEcAn.workflow::start_model_runs – Begin model run execution 12.1.7.1 Model-specific configuration See the following: ED2 Configuration SIPNET Configuration BIOCRO Configuration 12.1.7.2 ED2 specific tags Following variables are ED specific and are used in the ED2 Configuration. Starting at 1.3.7 the tags for inputs have moved to <run><inputs>. This includes, veg, soil, psscss, inputs. <edin>/home/carya/runs/PEcAn_4/ED2IN.template</edin> <config.header> <radiation> <lai_min>0.01</lai_min> </radiation> <ed_misc> <output_month>12</output_month> </ed_misc> </config.header> <phenol.scheme>0</phenol.scheme> edin : [required] template used to write ED2IN file veg : OBSOLETE [required] location of VEG database, now part of <run><inputs> soil : OBSOLETE [required] location of soild database, now part of <run><inputs> psscss : OBSOLETE [required] location of site inforation, now part of <run><inputs>. Should be specified as <pss>, <css> and <site>. inputs : OBSOLETE [required] location of additional input files (e.g. data assimilation data), now part of <run><inputs>. Should be specified as <lu> and <thsums>. 12.1.8 run: Run Setup This section provides detailed configuration for the model run, including the site and time period for the simulation and what input files will be used. <run> <site> <id>1000000098</id> <met.start>2004/01/01</met.start> <met.end>2004/12/31</met.end> <site.pft> <pft.name>temperate.needleleaf.evergreen</pft.name> <pft.name>temperate.needleleaf.evergreen.test</pft.ame> </site.pft> </site> <inputs> <met> <source>CRUNCEP</source> <output>SIPNET</output> </met> <poolinitcond> <source>NEON_veg</source> <output>poolinitcond</output> <ensemble>100</ensemble> <startdate>2019-01-01</startdate> <enddate>2019-12-31</enddate> <storedir>/projectnb/dietzelab/neon_store</storedir> </poolinitcond> </inputs> <start.date>2004/01/01</start.date> <end.date>2004/12/31</end.date> <stop_on_error>TRUE</stop_on_error> </run> 12.1.8.1 site: Where to run the model This contains the following tags: id – This is the numeric ID of the site in the PEcAn database (table sites, column id). PEcAn can automatically fill in other relevant information for the site (e.g. name, lat, lon) using the site ID, so those fields are optional if ID is provided. name – The name of the site, as a string. lat, lon – The latitude and longitude coordinates of the site, as decimals. met.start, met.end – ??? <site.pft> (optional) If this tag is found under the site tag, then PEcAn automatically makes sure that only PFTs defined under this tag is used for generating parameter’s samples. Following shows an example of how this tag can be added to the PEcAn xml : <site.pft> <pft.name>temperate.needleleaf.evergreen</pft.name> <pft.name>temperate.needleleaf.evergreen</pft.name> </site.pft> For multi-site runs if the pft.site tag (see {#xml-run-inputs}) is defined under input, then the above process will be done automatically under prepare settings step in PEcAn main workflow and there is no need for adding the tags manually. Using the pft.site tag however, requires a lookup table as an input (see {#xml-run-inputs}). 12.1.8.2 inputs: Model inputs Models require several different types of inputs to run. Exact requirements differ from model to model, but common inputs include meteorological/climate drivers, site initial conditions (e.g. vegetation composition, carbon pools), and land use drivers. In general, all inputs should have the following tags: id: Numeric ID of the input in the PEcAn database (table inputs, column id). If not specified, PEcAn will try to figure this out based on the source tag (described below). path: The file path of the input. Usually, PEcAn will set this automatically based on the id (which, in turn, is determined from the source). However, this can be set manually for data that PEcAn does not know about (e.g. data that you have processed yourself and have not registered with the PEcAn database). source: The input data type. This tag name needs to match the names in the corresponding conversion functions. If you are using PEcAn’s automatic input processing, this is the only field you need to set. However, this field is ignored if id and/or path are provided. output: ??? The following are the most common types of inputs, along with their corresponding tags: 12.1.8.2.1 met: Meteorological inputs (Under construction. See the PEcAn.data.atmosphere package, located in modules/data.atmosphere, for more details.) 12.1.8.2.2 (Experimental) soil: Soil inputs (Under construction. See the PEcAn.data.land package, located in modules/data.land, for more details). 12.1.8.2.3 (Experimental) veg: Vegetation initial conditions 12.1.8.2.4 poolinitcond: initial condition inputs source: Data source of initial condition .rd veg files ex: NEON_veg, FIA. output: This tag can only must match the cooresponding tag in the modeltypes_formats table that matches with your model type ex for SIPNET model tag is poolinitcond. ensemble: Number of initial conditions ensemble member files ex: 30. (Under construction. Follow developments in the PEcAn.data.land package, located in modules/data.land in the source code). 12.1.8.2.5 pft.site Multi-site site / PFT mapping When performing multi-site runs, it is not uncommon to find that different sites need to be run with different PFTs, rather than running all PFTs at all sites. If you’re interested to use a specific PFT for your site/sites you can use the following tag to tell PEcAn which PFT needs to be used for what site. <pft.site> <path>site_pft.csv</path> </pft.site> For example using the above tag, user needs to have a csv file named site_pft stored in the pecan folder. At the moment we have functions supporting just the .csv and .txt files which are comma separated and have the following format: site_id, pft_name 1000025731,temperate.broadleaf.deciduous 764,temperate.broadleaf.deciduous Then pecan would use this lookup table to inform write.ensemble.config function about what PFTs need to be used for what sites. 12.1.8.3 start.date and end.date The start and end date for the run, in a format parseable by R (e.g. YYYY/MM/DD or YYYY-MM-DD). These dates are inclusive; in other words, they refer to the first and last days of the run, respectively. NOTE: Any time-series inputs (e.g. meteorology drivers) must contain all of these dates. PEcAn tries to detect and throw informative errors when dates are out of bounds inputs, but it may not know about some edge cases. 12.1.8.4 Other tags The following tags are optional run settings that apply to any model: jobtemplate: the template used when creating a job.sh file, which is used to launch the actual model. Each model has its own default template in the inst folder of the corresponding R package (for instance, here is the one for ED2). The following variables can be used: @SITE_LAT@, @SITE_LON@, @SITE_MET@, @START_DATE@, @END_DATE@, @OUTDIR@, @RUNDIR@ which all come variables in the pecan.xml file. The following two command can be used to copy and clean the results from a scratch folder (specified as scratch in the run section below, for example local disk vs network disk) : @SCRATCH_COPY@, @SCRATCH_CLEAR@. stop_on_error: (logical) Whether the workflow should immediately terminate if any of the model runs fail. If unset, this defaults to TRUE unless you are running an ensemble simulation (and ensemble size is greater than 1). Some models also have model-specific tags, which are described in the PEcAn Models section. 12.1.9 host: Host information for remote execution This section provides settings for remote model execution, i.e. any execution that happens on a machine (including “virtual” machines, like Docker containers) different from the one on which the main PEcAn workflow is running. A common use case for this section is to submit model runs as jobs to a high-performance computing cluster. If no host tag is provided, PEcAn assumes models are run on localhost, a.k.a. the same machine as PEcAn itself. For detailed instructions on remote execution, see the Remote Execution page. For detailed information on configuring this for RabbitMQ, see the RabbitMQ page. The following provides a quick overview of XML tags related to remote execution. NOTE: Any paths specified in the pecan.xml refer to paths on the host specified in this section, /not/ the machine on which PEcAn is running (unless models are running on localhost or this section is omitted). <host> <name>pecan2.bu.edu</name> <rundir>/fs/data3/guestuser/pecan/testworkflow/run</rundir> <outdir>/fs/data3/guestuser/pecan/testworkflow/out</outdir> <scratchdir>/tmp/carya</scratchdir> <clearscratch>TRUE</clearscratch> <qsub>qsub -N @NAME@ -o @STDOUT@ -e @STDERR@ -S /bin/bash</qsub> <qsub.jobid>Your job ([0-9]+) .*</qsub.jobid> <qstat>'qstat -j @JOBID@ &> /dev/null || echo DONE'</qstat> <prerun>module load udunits R/R-3.0.0_gnu-4.4.6</prerun> <cdosetup>module load cdo/2.0.6</cdosetup> <modellauncher> <binary>/usr/local/bin/modellauncher</binary> <qsub.extra>-pe omp 20</qsub.extra> </modellauncher> </host> The host section has the following tags: name: [optional] name of host server where model is located and executed, if not specified localhost is assumed. folder: [required] The location to store files on the remote machine. For localhost this is optional (<outdir> is the default), for any other host this is required. rundir: [optional] location where all the configuration files are written. If not specified, this will be generated starting at the path specified in folder. outdir: [optional] location where all the outputs of the model are written. If not specified, this will be generated starting at the path specified in folder. scratchdir: [optional] location where output is written. If specified the output from the model is written to this folder and copied to the outdir when the model is finished, this could significantly speed up the model execution (by using local or ram disk). clearscratch: [optional] if set to TRUE the scratchfolder is cleaned up after copying the results to the outdir, otherwise the folder will be left. The default is to clean up after copying. qsub: [optional] the command to submit a job to the queuing system. There are 3 parameters you can use when specifying the qsub command, you can add additional values for your specific setup (for example -l walltime to specify the walltime, etc). You can specify @NAME@ the pretty name, @STDOUT@ where to write stdout and @STDERR@, where to write stderr. You can specify an empty element (<qsub/>) in which case it will use the default value is qsub -V -N @NAME@ -o @STDOUT@ -e @STDERR@ -s /bin/bash. qsub.jobid: [optional] the regular expression used to find the jobid returned from qsub. If not specified (and qsub is) it will use the default value is Your job ([0-9]+) .* qstat: [optional] the command to execute to check if a job is finished, this should return DONE if the job is finished. There is one parameter this command should take @JOBID@ which is the ID of the job as returned by qsub.jobid. If not specified (and qsub is) it will use the default value is qstat -j @JOBID@ || echo DONE prerun: [optional] additional options to add to the job.sh at the top. cdosetup: [optional] additional options to add to the job.sh at the top, specifically work for the CDO Linux command line package, allowing the user to generate the full year netCDF file through model2netcdf.SIPNET function (there has been an issue while this function will iteratively overlap the previous netCDF file, resulting in partial data loss). modellauncher: [optional] this is an experimental section that will allow you to submit all the runs as a single job to a HPC system. The modellauncher section if specified will group all runs together and only submit a single job to the HPC cluster. This single job will leverage of a MPI program that will execute a single run. Some HPC systems will place a limit on the number of jobs that can be executed in parallel, this will only submit a single job (using multiple nodes). In case there is no limit on the number of jobs, a single PEcAn run could potentially submit a lot of jobs resulting in the full cluster running jobs for a single PEcAn run, preventing others from executing on the cluster. The modellauncher has 3 arguments: * binary : [required] The full path to the binary modellauncher. Source code for this file can be found in pecan/contrib/modellauncher](https://github.com/PecanProject/pecan/tree/develop/contrib/modellauncher). * qsub.extra : [optional] Additional flags to pass to qsub besides those specified in the qsub tag in host. This option can be used to specify that the MPI environment needs to be used and the number of nodes that should be used. * mpirun: [optional] Additional commands to be added to the front of launcher.sh. Default is mpirun <path to binary> <path to joblist.txt>. Edit this to, for example, load necessary modules to run mpirun. "],["xml-advanced.html", "12.2 Advanced features", " 12.2 Advanced features 12.2.1 ensemble: Ensemble Runs As with meta.analysis, if this section is missing, then PEcAn will not do an ensemble analysis. <ensemble> <size>1</size> <variable>NPP</variable> <samplingspace> <parameters> <method>uniform</method> </parameters> <met> <method>sampling</method> </met> </samplingspace> </ensemble> An alternative configuration is as follows: <ensemble> <size>5</size> <variable>GPP</variable> <start.year>1995</start.year> <end.year>1999</end.year> <samplingspace> <parameters> <method>lhc</method> </parameters> <met> <method>sampling</method> </met> </samplingspace> </ensemble> Tags in this block can be broken down into two categories: Those used for setup (which determine how the ensemble analysis runs) and those used for post-hoc analysis and visualization (i.e. which do not affect how the ensemble is generated). Tags related to ensemble setup are: size : (required) the number of runs in the ensemble. samplingspace: (optional) Contains tags for defining how the ensembles will be generated. Each piece in the sampling space can potentially have a method tag and a parent tag. Method refers to the sampling method and parent refers to the cases where we need to link the samples of two components. When no tag is defined for one component, one sample will be generated and used for all the ensembles. This allows for partitioning/studying different sources of uncertainties. For example, if no met tag is defined then, one met path will be used for all the ensembles and as a result the output uncertainty will come from the variability in the parameters. At the moment no sampling method is implemented for soil and vegetation. Available sampling methods for parameters can be found in the documentation of the PEcAn.utils::get.ensemble.samples function. For the cases where we need simulations with a predefined set of parameters, met and initial condition we can use the restart argument. Restart needs to be a list with name tags of runid, inputs, new.params (parameters), new.state (initial condition), ensemble.id (ensemble ids), start.time, and stop.time. The restart functionality is developed using model specific functions by called write_restart.modelname. You need to make sure first that this function is already exist for your desired model. Note: if the ensemble size is set to 1, PEcAn will select the posterior median parameter values rather than taking a single random draw from the posterior Tags related to post-hoc analysis and visualization are: variable: (optional) name of one (or more) variables the analysis should be run for. If not specified, sensitivity.analysis variable is used, otherwise default is GPP (Gross Primary Productivity). (NOTE: This static visualization functionality will soon be deprecated as PEcAn moves towards interactive visualization tools based on Shiny and htmlwidgets). This information is currently used by the following PEcAn workflow functions: PEcAn.<MODEL>::write.config.<MODEL> - See above. PEcAn.uncertainty::write.ensemble.configs - Write configuration files for ensemble analysis PEcAn.uncertainty::run.ensemble.analysis - Run ensemble analysis 12.2.2 sensitivity.analysis: Sensitivity analysis Only if this section is defined a sensitivity analysis is done. This section will have <quantile> or <sigma> nodes. If neither are given, the default is to use the median +/- [1 2 3] x sigma (e.g. the 0.00135 0.0228 0.159 0.5 0.841 0.977 0.999 quantiles); If the 0.5 (median) quantile is omitted, it will be added in the code. <sensitivity.analysis> <quantiles> <sigma>-3</sigma> <sigma>-2</sigma> <sigma>-1</sigma> <sigma>1</sigma> <sigma>2</sigma> <sigma>3</sigma> </quantiles> <variable>GPP</variable> <perpft>TRUE</perpft> <start.year>2004</start.year> <end.year>2006</end.year> </sensitivity.analysis> quantiles/sigma : [optional] The number of standard deviations relative to the standard normal (i.e. “Z-score”) for which to perform the ensemble analysis. For instance, <sigma>1</sigma> corresponds to the quantile associated with 1 standard deviation greater than the mean (i.e. 0.681). Use a separate <sigma> tag, all under the <quantiles> tag, to specify multiple quantiles. Note that we do not automatically add the quantile associated with -sigma – i.e. if you want +/- 1 standard deviation, then you must include both <sigma>1</sigma> and <sigma>-1</sigma>. start.date : [required?] start date of the sensitivity analysis (in YYYY/MM/DD format) end.date : [required?] end date of the sensitivity analysis (in YYYY/MM/DD format) NOTE: start.date and end.date are distinct from values set in the run tag because this analysis can be done over a subset of the run. variable : [optional] name of one (or more) variables the analysis should be run for. If not specified, sensitivity.analysis variable is used, otherwise default is GPP. perpft : [optional] if TRUE a sensitivity analysis on PFT-specific outputs will be run. This is only possible if your model provides PFT-specific outputs for the variable requested. This tag only affects the output processing, not the number of samples proposed for the analysis nor the model execution. This information is currently used by the following PEcAn workflow functions: PEcAn.<MODEL>::write.configs.<MODEL> – See above PEcAn.uncertainty::run.sensitivity.analysis – Executes the uncertainty analysis 12.2.3 Parameter Data Assimilation The following tags can be used for parameter data assimilation. More detailed information can be found here: Parameter Data Assimilation Documentation 12.2.4 Multi-Settings Multi-settings allows you to do multiple runs across different sites. This customization can also leverage site group distinctions to expedite the customization. It takes your settings and applies the same settings, changing only the site level tags across sites. To start, add the multisettings tag within the <run></run> section of your xml <multisettings> <multisettings>run</multisettings> <multisettings> Additional tags for this section exist and can fully be seen here: <multisettings> <multisettings>assim.batch</multisettings> <multisettings>ensemble</multisettings> <multisettings>sensitivity.analysis</multisettings> <multisettings>run</multisettings> </multisettings> These tags correspond to different pecan analysis that need to know that there will be multiple settings read in. Next you’ll want to add the following tags to denote the group of sites you want to use. It leverages site groups, which are defined in BETY. <sitegroup> <id>1000000022</id> </sitegroup> If you add this tag, you must remove the <site> </site> tags from the <run> tag portion of your xml. The id of your sitegroup can be found by lookig up your site group within BETY. You do not have to use the sitegroup tag. You can manually add multiple sites using the structure in the example below. Lastly change the top level tag to <pecan.multi>, meaning the top and bootom of your xml should look like this: <?xml version="1.0"?> <pecan.multi> ... </pecan.multi> Once you have defined these tags, you can run PEcAn, but there may be further specifications needed if you know that different data sources have different dates available. Run workflow.R up until # Write pecan.CHECKED.xml PEcAn.settings::write.settings(settings, outputfile = "pecan.CHECKED.xml") Once this section is run, you’ll need to open pecan.CHECKED.xml. You will notice that it has expanded from your original pecan.xml. <run> <settings.1> <site> <id>796</id> <met.start>2005/01/01</met.start> <met.end>2011/12/31</met.end> <name>Bartlett Experimental Forest (US-Bar)</name> <lat>44.06464</lat> <lon>-71.288077</lon> </site> <start.date>2005/01/01</start.date> <end.date>2011/12/31</end.date> <inputs> <met> <path>/fs/data1/pecan.data/dbfiles/AmerifluxLBL_SIPNET_site_0-796/AMF_US-Bar_BASE_HH_4-1.2005-01-01.2011-12-31.clim</path> </met> </inputs> </settings.1> <settings.2> <site> <id>767</id> <met.start>2001/01/01</met.start> <met.end>2014/12/31</met.end> <name>Morgan Monroe State Forest (US-MMS)</name> <lat>39.3231</lat> <lon>-86.4131</lon> </site> <start.date>2001/01/01</start.date> <end.date>2014/12/31</end.date> <inputs> <met> <path>/fs/data1/pecan.data/dbfiles/AmerifluxLBL_SIPNET_site_0-767/AMF_US-MMS_BASE_HR_8-1.2001-01-01.2014-12-31.clim</path> </met> </inputs> </settings.2> .... </run> The ... replaces the rest of the site settings for however many sites are within the site group. Looking at the example above, take a close look at the <met.start></met.start> and <met.end></met.end>. You will notice that for both sites, the dates are different. In this example they were edited by hand to include the dates that are available for that site and source. You must know your source prior. Only the source CRUNCEP has a check that will tell you if your dates are outside the range available. PEcAn will automatically populate these dates across sites according the original setting of start and end dates. In addition, you will notice that the <path></path> section contains the model specific meteorological data file. You can add that in by hand or you can you can leave the normal tags that met process workflow will use to process the data into your model specific format: <met> <source>AmerifluxLBL</source> <output>SIPNET</output> <username>pecan</username> </met> 12.2.5 (experimental) State Data Assimilation The following tags can be used for state data assimilation. More detailed information can be found here: State Data Assimilation Documentation <state.data.assimilation> <process.variance>TRUE</process.variance> <aqq.Init>1</aqq.Init> <bqq.Init>1</bqq.Init> <sample.parameters>FALSE</sample.parameters> <adjustment>TRUE</adjustment> <censored.data>FALSE</censored.data> <FullYearNC>TRUE</FullYearNC> <NC.Overwrite>FALSE</NC.Overwrite> <NC.Prefix>sipnet.out</NC.Prefix> <q.type>SINGLE</q.type> <free.run>FALSE</free.run> <Localization.FUN>Local.support</Localization.FUN> <scalef>1</scalef> <chains>5</chains> <state.variables> <variable> <variable.name>AbvGrndWood</variable.name> <unit>MgC/ha</unit> <min_value>0</min_value> <max_value>9999</max_value> </variable> <variable> <variable.name>LAI</variable.name> <unit></unit> <min_value>0</min_value> <max_value>9999</max_value> </variable> <variable> <variable.name>SoilMoistFrac</variable.name> <unit></unit> <min_value>0</min_value> <max_value>100</max_value> </variable> <variable> <variable.name>TotSoilCarb</variable.name> <unit>kg/m^2</unit> <min_value>0</min_value> <max_value>9999</max_value> </variable> </state.variables> <Obs_Prep> <Landtrendr_AGB> <AGB_input_dir>/projectnb/dietzelab/dongchen/Multi-site/download_500_sites/AGB</AGB_input_dir> <allow_download>TRUE</allow_download> <export_csv>TRUE</export_csv> <timestep> <unit>year</unit> <num>1</num> </timestep> </Landtrendr_AGB> <MODIS_LAI> <search_window>30</search_window> <export_csv>TRUE</export_csv> <run_parallel>TRUE</run_parallel> <timestep> <unit>year</unit> <num>1</num> </timestep> </MODIS_LAI> <SMAP_SMP> <search_window>30</search_window> <export_csv>TRUE</export_csv> <update_csv>FALSE</update_csv> <timestep> <unit>year</unit> <num>1</num> </timestep> </SMAP_SMP> <Soilgrids_SoilC> <timestep> <unit>year</unit> <num>1</num> </timestep> </Soilgrids_SoilC> <outdir>/projectnb/dietzelab/dongchen/All_NEON_SDA/test_OBS</outdir> <start.date>2012-07-15</start.date> <end.date>2021-07-15</end.date> </Obs_Prep> <spin.up> <start.date>2004/01/01</start.date> <end.date>2006/12/31</end.date> </spin.up> <forecast.time.step>1</forecast.time.step> <start.date>2004/01/01</start.date> <end.date>2006/12/31</end.date> </state.data.assimilation> process.variance : [optional] TRUE/FLASE flag for if process variance should be estimated (TRUE) or not (FALSE). If TRUE, a generalized ensemble filter will be used. If FALSE, an ensemble Kalman filter will be used. Default is FALSE. aqq.Init : [optional] The initial value of aqq used for estimate the Q distribution, the default value is 1 (note that, the aqq.init and bqq.init right now only work on the VECTOR q type, and we didn’t account for the variabilities of them across sites or variables, meaning we initialize the aqq and bqq given single value). bqq.Init : [optional] The initial value of bqq used for estimate the Q distribution, the default value is 1. sample.parameters : [optional] TRUE/FLASE flag for if parameters should be sampled for each ensemble member or not. This allows for more spread in the intial conditions of the forecast. adjustment : [optional] Bool variable decide if you want to adjust analysis results by the likelihood. censored.data : [optional] Bool variable decide if you want to do MCMC sampling for the forecast ensemble space, the default is FALSE. FullYearNC : [optional] Bool variable decide if you want to generate the full-year netcdf file when there is a overlap in time, the default is TRUE. NC.Overwrite : [optional] Bool variable decide if you want to overwrite the previous netcdf file when there is a overlap in time, the default is FALSE. NC.Prefix : [optional] The prefix for the generation of the full-year netcdf file, the default is sipnet.out. q.type : [optional] The type of process variance that will be estimated, the default is SINGLE. free.run : [optional] If it’s a free run without any observations, the default is FALSE. Localization.FUN : [optional] The localization function name for the localization operation, the default is Local.support. scalef : [optional] The scale parameter used for the localization operation, the smaller the value is, the sites are more isolated. chains : [optional] The number of chains needed to be estimated during the MCMC sampling process. NOTE: If TRUE, you must also assign a vector of trait names to pick.trait.params within the sda.enkf function. state.variable : [required] State variable that is to be assimilated (in PEcAn standard format, with pre-specified variable name, unit, and range). Four variables can be assimilated so far: including Aboveground biomass (AbvGrndWood), LAI, SoilMoistFrac, and Soil carbon (TotSoilCarb). Obs_Prep : [required] This section will be handled through the SDA_Obs_Assembler function, if you want to proceed with this function, this section is required. spin.up : [required] start.date and end.date for model spin up. NOTE: start.date and end.date are distinct from values set in the run tag because spin up can be done over a subset of the run. forecast.time.step : [optional] start.date and end.date for model spin up. start.date : [required?] start date of the state data assimilation (in YYYY/MM/DD format) end.date : [required?] end date of the state data assimilation (in YYYY/MM/DD format) NOTE: start.date and end.date are distinct from values set in the run tag because this analysis can be done over a subset of the run. 12.2.6 State Variables for State Data Assimilation The following tags can be used for documentation of state variables required for the SDA workflow. <state.data.assimilation> <state.variables> <variable> <variable.name>AbvGrndWood</variable.name> <unit>MgC/ha</unit> <min_value>0</min_value> <max_value>9999</max_value> </variable> <variable> <variable.name>LAI</variable.name> <unit></unit> <min_value>0</min_value> <max_value>9999</max_value> </variable> <variable> <variable.name>SoilMoistFrac</variable.name> <unit></unit> <min_value>0</min_value> <max_value>100</max_value> </variable> <variable> <variable.name>TotSoilCarb</variable.name> <unit>kg/m^2</unit> <min_value>0</min_value> <max_value>9999</max_value> </variable> </state.variables> </state.data.assimilation> variable : [required] Each of this section should include each state variables separately. variable.name : [required] The name of the state variable (same as what we call it in the model output file. for example, “SIPNET.out” file). unit : [required] The unit of the state variable (can be empty if it’s unitless). min_value : [required] The minimum range of the state variable. max_value : [required] The maximum range of the state variable. 12.2.7 Observation Preparation for State Data Assimilation The following tags can be used for observation preparation for the SDA workflow. <state.data.assimilation> <Obs_Prep> <Landtrendr_AGB> <AGB_indir>/projectnb/dietzelab/dongchen/Multi-site/download_500_sites/AGB</AGB_indir> <allow_download>TRUE</allow_download> <export_csv>TRUE</export_csv> <timestep> <unit>year</unit> <num>1</num> </timestep> </Landtrendr_AGB> <MODIS_LAI> <search_window>30</search_window> <export_csv>TRUE</export_csv> <run_parallel>TRUE</run_parallel> <timestep> <unit>year</unit> <num>1</num> </timestep> </MODIS_LAI> <SMAP_SMP> <search_window>30</search_window> <export_csv>TRUE</export_csv> <update_csv>FALSE</update_csv> <timestep> <unit>year</unit> <num>1</num> </timestep> </SMAP_SMP> <Soilgrids_SoilC> <timestep> <unit>year</unit> <num>1</num> </timestep> </Soilgrids_SoilC> <outdir>/projectnb/dietzelab/dongchen/All_NEON_SDA/test_OBS</outdir> <start.date>2012-07-15</start.date> <end.date>2021-07-15</end.date> </Obs_Prep> </state.data.assimilation> The Obs_Prep section is specifically designed for the SDA_OBS_Assembler function within the state.data.assimilation section, which helps construct the obs.mean and obs.cov objects more efficiently. Within the Obs_Prep section, the user needs to specify data streams that will be proceeded in the assembler function as DataSource_AbbreviationOfData (for example: Landtrendr_AGB for Landtrendr aboveground biomass observations, or Soilgrids_SoilC for soilgrids soil organic carbon concentration). There are four functions that can be used for preparing observations: MODIS_LAI_prep, Soilgrids_SoilC_prep, SMAP_SMP_prep, and Landtrendr_AGB_prep functions. Other than that, the output directory, and start and end date need to be set within the Obs_Prep section, note that if you choose to export csv file for any data, you will get the corresponding csv file just under the outdir, for the Rdata of obs.mean and obs.cov objects, they will be stored whthin the Rdata folder within the outdir, if the Rdata folder is not created, then the assembler will help to create this folder automatically. * outdir : [required] This tag is for every observation preparation function, which points to the directory where the csv file associated with each data stream should be generated, read, or updated. * start.date : [required] This tag defines the exact start point of time, which differs from the start.date in the state.data.assimilation section in a way that it is the time when first observation should be taken. * end.date : [required] This tag defines the exact end point of time, which differs from the end.date in the state.data.assimilation section in a way that it is the time when last observation should be taken. Within each data-specific section (for example, within Landtrendr_AGB section), there are few general settings need to be set like the export_csv and timestep arguments. * export_csv : [optional] This tag is for every observation preparation function, which decide if we want to export the csv file to the outdir. Normally the outdir should be given in order to set export_csv as TRUE. The default value is TRUE. * timestep : [optional] This tag defines the time unit and how many unit time per each time step. It supports year, month, week, and day as time unit. If you assign a general timestep just under the Obs_Prep section, all variable will be handled with the same timestep. If you assign each variable with each timestep, then the assembler function will create obs.mean and obs.cov with mixed temporal resolution. The default value is list(unit=“year”, num=1). There are also several settings that are data-specific, for example, the AGB_indir only applies to Landtrendr AGB data that points to the directory where the AGB files are stored. Beyond that, the allow_download is also AGB specific, which decide if you want to download the AGB files if some are missing. * AGB_indir : [required] This tag is only for Landtrendr_AGB_prep function, which points to the directory where the pre-downloaded AGB files existed. * allow_download : [optional] This tag is only for Landtrendr_AGB_prep function, which decide if we want to proceed to download the AGB files (that will take a while, and a lot of space) if some of them are detected to be empty. The default value is FALSE. For the MODIS LAI and SMAP soil moisture observations, the search_window specifies how many days you want to search for the data with desired dates. Beyond that, the update_csv is specifically used for the SMAP data, which decide if you want to reproduce formatted csv file based on the downloaded SMAP_gee.csv file. For the run_parallel in MODIS LAI settings, it specify if you want to proceed the function parallely. * search_window : [optional] This tag is used for LAI and SMAP observation preparation functions, which defines the search window within which the closest observation to the target date is selected. The default value is 30 days. * update_csv : [optional] This tag is used for SMAP preparation function only, which decide if we want to update the CSV file. Normally, we will only set it to be TRUE if we redownload the SMAP_GEE csv file from Google Earth Engine, the tutorial can be found in the SMAP_SMP_prep function itself. The default value is FALSE. * run_parallel : [optional] This tag defines if you want to proceed the MODIS LAI function parallely, the default value is FALSE. 12.2.8 (experimental) Brown Dog This section describes how to connect to Brown Dog. This facilitates processing and conversions of data. <browndog> <url>...</url> <username>...</username> <password>...</password> </browndog> url: (required) endpoint for Brown Dog to be used. username: (optional) username to be used with the endpoint for Brown Dog. password: (optional) password to be used with the endpoint for Brown Dog. This information is currently used by the following R functions: PEcAn.data.atmosphere::met.process – Generic function for processing meteorological input data. PEcAn.benchmark::load_data – Generic, versatile function for loading data in various formats. 12.2.9 (experimental) Benchmarking Coming soon… 12.2.10 Remote data module This section describes the tags required for configuring remote_process. <remotedata> <out_get_data>...</out_get_data> <source>...</source> <collection>...</collection> <scale>...</scale> <projection>...</projection> <qc>...</qc> <algorithm>...</algorithm> <credfile>...</credfile> <out_process_data>...</out_process_data> <overwrite>...</overwrite> </remotedata> out_get_data: (required) type of raw output requested, e.g, bands, smap source: (required) source of remote data, e.g., gee or AppEEARS collection: (required) dataset or product name as it is provided on the source, e.g. “COPERNICUS/S2_SR” for gee or “SPL3SMP_E.003” for AppEEARS scale: (optional) pixel resolution required for some gee collections, recommended to use 10 for Sentinel 2 projection: (optional) type of projection. Only required for AppEEARS polygon AOI type qc: (optional) quality control parameter, required for some gee collections overwrite: (optional) if TRUE database checks will be skipped and existing data of same type will be replaced entirely. When processed data is requested, the raw data required for creating it will also be replaced. By default FALSE These tags are only required if processed data is requested: out_process_data: (optional) type of processed output requested, e.g, LAI algorithm: (optional) algorithm used for processing data, currently only SNAP is implemented to estimate LAI from Sentinel-2 bands credfile: (optional) absolute path to JSON file containing Earthdata username and password, only required for AppEEARS pro_mimetype: (optional) MIME type of the processed file pro_formatname: (optional) format name of the processed file Additional information for the module are taken from the registration files located at data.remote/inst/registration The output data from the module are returned in the following tags: raw_id: input id of the raw file raw_path: absolute path to the raw file pro_id: input id of the processed file pro_path: absolute path to the processed file "],["workflow.html", "13 PEcAn workflow (web/workflow.R)", " 13 PEcAn workflow (web/workflow.R) How the workflow works How each module is called How to do outside of web interface Link to “folder structure” section below for detailed descriptions "],["workflow-readsettings.html", "13.1 Read Settings", " 13.1 Read Settings (TODO: Under construction…) "],["workflow-input.html", "13.2 Input Conversions", " 13.2 Input Conversions "],["workflow-input-data.html", "13.3 Input Data", " 13.3 Input Data Models require input data as drivers, parameters, and boundary conditions. In order to make a variety of data sources that have unique formats compatible with models, conversion scripts are written to convert them into a PEcAn standard format. That format is a netcdf file with variables names and specified to our standard variable table. Within the PEcAn repository, code pertaining to input conversion is in the MODULES directory under the data.atmosphere and data.land directories. "],["workflow-input-initial.html", "13.4 Initial Conditions", " 13.4 Initial Conditions To convert initial condition data into the PEcAn Standard and then into the model formats we follow three main steps: Downloading vegetation info -converting the vegetation info into .rd file format Creating ensemble members based on veg file -resamples veg file to create ensmeble members Converting ensemble members to model specific format Common Questions regarding IC Data: How do I know if my data product is supported by ic_process workflow? -The current data products supported by ic_process are: NEON, FIA, and BADM. That said you if your vegetation files are registered with BETY and abide by the BETY formats and column names you will be able to use ic_process to create ensemble members and convert the ensemble members to model specific format. See section 6.3.3 for more info. Can I use ic_process outside of the PEcAn workflow? -Short answer is yes, ic_process can be called as a function all by itself outside of do_conversions. What about soil initial conditions? -Please reference section 6.3.6 for more info. ic_process workflow: Required inputs for ic_process are: settings object with and sections (see pecan.xml section for more info), dir or location where you would to store the output files, input object that contains all info from settings\\(run\\)inputs$poolinitcond, and the overwrite setting the default is overwrite = FALSE. get_veg_module() This function will either download your vegetation info from: NEON_veg, FIA, BADM sources or query BETY for existing veg files previously downloaded. The input and dbfile ids of your veg file are then added to a list getveg.id. ens_veg_module() This function will create ensemble member ncdf files by resampling the veg file supplied in getveg.id. put_veg_module() This function will convert the ensemble member ncdf files into model specific format. Currently the supported models are ED2 and SIPNET. "],["workflow-met.html", "13.5 Meteorological Data", " 13.5 Meteorological Data To convert meterological data into the PEcAn Standard and then into model formats we follow four main steps: Downloading raw data - Currently supported products - Example Code Converting raw data into a CF standard - Example Code Downscaling and gapfilling - Example Code Coverting to Model Specific format - Example Code Common Questions regarding Met Data: How do I add my Meterological data product to PEcAn? How do I use PEcAn to convert Met data outside the workflow? The main script that handles Met Processing, is met.process. It acts as a wrapper function that calls individual modules to facilitate the processing of meteorological data from it’s original form to a pecan standard, and then from that standard to model specific formats. It also handles recording these processes in the BETY database. Downloading raw data - Available Meteorological Drivers - Example Code to download Ameriflux data Converting raw data into a CF standard (if needed) - Example Code to convert from raw csv to CF standard Downscaling and gapfilling(if needed) - Example Code to gapfill Coverting to Model Specific format - Example Code to convert Standard into Sipnet format 13.5.1 Downloading Raw data (Description of Process) Given the information passed from the pecan.xml met.process will call the download.raw.met.module to facilitate the execution of the necessary functions to download raw data. <met> <source>AmerifluxLBL</source> <output>SIPNET</output> <username>pecan</username> </met> 13.5.2 Converting raw data to PEcAn standard 13.5.3 Downscaling and gapfilling (optional) 13.5.4 Converting from PEcAn standard to model-specific format "],["workflow-traits.html", "13.6 Traits", " 13.6 Traits (TODO: Under construction) "],["workflow-metaanalysis.html", "13.7 Meta Analysis", " 13.7 Meta Analysis (TODO: Under construction) "],["workflow-modelconfig.html", "13.8 Model Configuration", " 13.8 Model Configuration (TODO: Under construction) "],["workflow-modelrun.html", "13.9 Run Execution", " 13.9 Run Execution (TODO: Under construction) "],["workflow-postrun.html", "13.10 Post Run Analysis", " 13.10 Post Run Analysis (TODO: Under construction) ## Advanced Analysis {#workflow-advanced} (TODO: Under construction) "],["pecan-models.html", "14 PEcAn Models", " 14 PEcAn Models This section will contain information about all models and output variables that are supported by PEcAn. Model Name Available in the VM Prescribed Inputs Input Functions/Values Restart Function BioCro Yes Yes Yes No CLM No No No No DALEC Yes Yes Yes No ED2 Yes Yes Yes Yes FATES No Yes No GDAY No No No No LDNDC No Yes No No LINKAGES Yes Yes Yes Yes LPJ-GUESS No Yes No No MAESPA Yes Yes No No PRELES Yes Yes Partially No SiPNET Yes Yes Yes Yes STICS Yes Yes No No Available in the VM - Denotes if a model is publicly available with PEcAn. Prescribed Inputs - Denotes whether or not PEcAn can prescribe inputs. Input Functions/Values - Denotes whether or not PEcAn has functions to fully produce a model’s Input values. Restart Function - Denotes status of model data assimilation capabilities. Output Variables PEcAn converts all model outputs to a single Output Standards. This standard evolved out of MsTMIP project, which is itself based on NACP, LBA, and other model-intercomparison projects. This standard was expanded for the PalEON MIP and the needs of the PEcAn modeling community to support variables not in these standards. Model developers: do not add variables to your PEcAn output without first adding them to the PEcAn standard table! Also, do not create new variables equivalent to existing variables but just with different names or units. "],["models-biocro.html", "14.1 BioCro", " 14.1 BioCro Model Information Home Page https://github.com/ebimodeling/biocro/blob/0.951/README.md Source Code https://github.com/ebimodeling/biocro/tree/0.951 License University of Illinois/NCSA Open Source License Authors Fernando E. Miguez, Deepak Jaiswal, Justin McGrath, David LeBauer, Scott Rohde, Dan Wang PEcAn Integration David LeBauer, Chris Black, Kristina Riemer 14.1.1 Introduction BioCro is a model that estimates photosynthesis at the leaf, canopy, and ecosystem levels and determines plant biomass allocation and crop yields, using underlying physiological and ecological processes to do so. 14.1.2 PEcAn configuration file additions The following sections of the PEcAn XML are relevant to the BioCro model: model revision – Model version number run site/id – ID associated with desired site from BETYdb site entry inputs met/output – Set as BIOCRO met/path – Path to file containing meteorological data 14.1.3 Model specific input files List of inputs required by model, such as met, etc. 14.1.4 Model configuration files Genus-specific parameter files are secretly required. These are stored in the PEcAn.BIOCRO package and looked up under the hood. write.configs.BIOCRO looks for defaults in this order: first any file at a path specified by settings$pft$constants$file, next by matching the genus name in datasets exported by the BioCro package, and last by matching the genus name in PEcAn.BIOCRO’s extdata/defaults directory. When adding a new genus, it is necessary to provide a new default parameter file in PEcAn.BIOCRO inst/extdata/defaults and also (for v<1.0) update the call_biocro() function. BioCro uses a config.xml file similar to ED2. At this time, no other template files are required. 14.1.5 Installation notes BioCro can be run standalone using the model’s R package. Instructions for installing and using the package are in the GitHub repo’s README file. PEcAn works with v. 0.9x and 1.x. Support for v 2.x is not implemented. Support for v 0.9x is most robust and tested. To install, use remotes::install_github('ebimodeling/biocro@0.951') "],["models-clm.html", "14.2 CLM", " 14.2 CLM Model Information Home Page Source Code License Authors PEcAn Integration Introduction Introduction about model PEcAn configuration file additions Should list the model specific additions to the PEcAn file here Model specific input files List of inputs required by model, such as met, etc. Model configuration files MODEL is configured using 3 files which are placed in the run folder, as well as a symbolic link to the met file. file1 : template for this file is located at models/MODEL/inst/file1 and is not modified. file2 : template for this file is located at models/MODEL/inst/file2 and is not modified. file3 : template for this file is in models/MODEL/inst/file3 or it is specified in the <model> section as <template>. The values in this template are replaced by those computed in the earlier stages of PEcAN. Installation notes This section contains notes on how to compile the model. The notes for the VM might work on other machines or configurations as well. VM "],["models-dalec.html", "14.3 DALEC", " 14.3 DALEC Model Information Home Page Source Code License Authors PEcAn Integration Introduction Introduction about model PEcAn configuration file additions Should list the model specific additions to the PEcAn file here Model specific input files List of inputs required by model, such as met, etc. Model configuration files MODEL is configured using 3 files which are placed in the run folder, as well as a symbolic link to the met file. file1 : template for this file is located at models/MODEL/inst/file1 and is not modified. file2 : template for this file is located at models/MODEL/inst/file2 and is not modified. file3 : template for this file is in models/MODEL/inst/file3 or it is specified in the <model> section as <template>. The values in this template are replaced by those computed in the earlier stages of PEcAN. Installation notes This section contains notes on how to compile the model. The notes for the VM might work on other machines or configurations as well. VM "],["models-ed.html", "14.4 ED2", " 14.4 ED2 Model Information Home Page http://moorcroftlab.oeb.harvard.edu/ Source Code https://github.com/EDmodel/ED2 License Authors Paul Moorcroft, … PEcAn Integration Michael Dietze, Rob Kooper 14.4.1 Introduction Introduction about ED model 14.4.2 PEcAn configuration file additions The following sections of the PEcAn XML are relevant to the ED model: model id – BETY model ID. Each ID corresponds to a different revision of ED (see below) revision – The revision (a.k.a. release) number of ED (e.g. “r82”). “rgit” indicates the latest code on the ED repository. edin – Name of the template ED2IN configuration file. If this is a functional path that points to a specific file, that file is used. If no file is found following the path literally, the workflow will try the path relative to the PEcAn.ED2 package using system.file (recall that files in inst are moved in the package root, but all other directory structure is preserved). If this is omitted, PEcAn.ED2::write.configs.ED2 will look for a file called ED2IN.<revision> (e.g. ED2IN.rgit, ED2IN.r86) in the PEcAn.ED2 package. Example: <edin>ED2IN.rgit</edin> will use the ED2IN.rgit file shipped with PEcAn.ED2 regardless of the revision of ED used. (Note however that if a file called ED2IN.rgit exists in the workflow runtime directory, that file will be used instead). start.date, end.date – Run start and end date, respectively met.start, met.end – Start and end year of meteorology inputs. By default (if omitted), these are set to the years of start.date and end.date, respectively. Setting these values to a shorter interval than start.date and end.date will cause ED to loop the meteorology input over the specified years. This may be useful for, for example, spinning up ED under a set of constant climate conditions. phenol.scheme phenol phenol.start phenol.end all_pfts – (Logical) If false or missing (default), only run ED2 with the PFTs configured via PEcAn (i.e. in the <pfts> section of the XML). If true, run with all 17 of ED2’s PFTs, using ED2’s internal default parameters for all PFTs not configured through PEcAn. See below for more details. ed2in_tags – Named list of additional tags in the ED2IN to be modified. These modifications override any of those set by other parts of the PEcAn workflow. These tags must be in all caps. Any tags that are not already in the ED2IN file will be added; this makes this an effective way to run newer versions of ED2 that have new ED2IN parameters without having to provide an entire new ED2IN. For example: <model> <ed2in_tags> <IOOUTPUT>0</IOOUTPUT> <PLANT_HYDRO_SCHEME>0</PLANT_HYDRO_SCHEME> <ISTOMATA_SCHEME>0</ISTOMATA_SCHEME> <ISTRUCT_GROWTH_SCHEME>0</ISTRUCT_GROWTH_SCHEME> <TRAIT_PLASTICITY_SCHEME>0</TRAIT_PLASTICITY_SCHEME> </ed2in_tags> </model> barebones_ed2in – Whether or not to try to annotate the ED2IN file with comments. If “true”, skip all comments and only write the tags themselves. If “false” (default), try to transfer comments from the template file into the target file. jobtemplate prerun – String of commands to be added to the job.sh model execution script before the model is run. Multiple commands should be separated by proper bash syntax – i.e. either with && or ;. One common use of this argument is to load modules on some HPC systems – for instance: <prerun>module load git hdf5</prerun> If your particular version of ED is failing early during execution with a mysterious “Segmentation fault”, that may indicate that its process is exceeding its stack limit. In this case, you may need to remove the stack limit restriction with a prerun command like the following: <prerun>ulimit -s unlimited</prerun> postrun – Same as <prerun>, but for commands to be run after model execution. binary – The full path to the ED2 binary on the target machine. binary_args – Additional arguments to be passed to the ED2 binary. Some common arguments are: -s – Delay OpenMPI initialization until the last possible moment. This is needed when running ED2 in a Docker container. It is included by default when the host is rabbitmq. -f /path/to/ED2IN – Full path to a specific ED2IN namelist file. Typically, this is not needed because, by default, ED searches for the ED2IN in the current directory and the PEcAn workflow places the ED2IN file and a symbolic link to the ED executable in the same (run) directory for you. run/site lat – Latitude coordinate of site lon – Longitude coordinate of site inputs met/path – Path to ED_MET_DRIVER_HEADER file pss: [required] location of patch file css: [required] location of cohort file site: [optional] location of site file lu: [required] location of land use file thsums: [required] location of thermal sums file veg: [required] location of vegetation data soil: [required] location of soil data 14.4.3 PFT configuration in ED2 ED2 has more detailed PFTs than many models, and a more complex system for configuring these PFTs. ED2 has 17 PFTs, based roughly on growth form (e.g. tree vs. grass), biome (tropical vs. temperate), leaf morphology (broad vs. needleleaf), leaf phenology (evergreen vs. deciduous), and successional status (e.g. early, mid, or late). Each PFT is assigned an integer (1-17), which is used by the ED model to assign default model parameters. The mappings of these integers onto PFT definitions are not absolute, and may change as the ED2 source code evolves. Unfortunately, the only authoritative source for these PFT definitions for any given ED2 version is the Fortran source code of that version. The following is the mapping as of ED2 commit 24e6df6a (October 2018): C4 grass Early-successional tropical Mid-successional tropical Late-successional tropical Temperate C3 grass Northern pine Southern pine Late-successional conifer Early-successional temperate deciduous Mid-successional temperate deciduous Late-successional temperate deciduous Agricultural (crop) 1 Agricultural (crop) 2 Agricultural (crop) 3 Agricultural (crop) 4 Subtropical C3 grass (C4 grass with C3 photosynthesis) “Araucaria” (non-optimized southern pine), or liana ED2 parameter defaults are hard-coded in its Fortran source code. However, most parameters can be modified via an XML file (determined by the ED2IN IEDCNFGF field; usually config.xml in the same directory as the ED2IN file). The complete record of all parameters (defaults and user overrides) used by a given ED2 run is stored in a history.xml file (usually in the same directory as the ED2IN) – this is the file to check to make sure that an ED2 run is parameterized as you expect. As with other models, PEcAn can set ED2 parameters using its built-in trait meta analysis. The function specifically responsible for writing the config.xml is PEcAn.ED2::write.config.xml.ED2 (which is called as part of the more general PEcAn.ED2::write.config.ED2). The configuration process proceeds as follows: First, the mappings between PEcAn PFT names and ED2 PFT numbers are determined according to the following logic: If the PFT has a <ed2_pft_number> XML tag, that number is used. For example: <pft> <name>umbs.early_hardwood</name> <ed2_pft_number>9</ed2_pft_number> </pft> If <ed2_pft_number> is not provided, the code tries to cross-reference the PFT name against the pftmapping.csv data provided with the PEcAn.ED2 package. If the name is not matched (perfectly!), the function will exit with an error. Second, the PFT number from the previous step is used to write that PFT’s parameters to the config.xml. The order of precedence for parameters is as follows (from highest to lowest): Explicit user overrides. These are specified via a <constants> tag in the PFT definition in the pecan.xml. For example: <pft> <name>umbs.early_hardwood</name> <constants> <sla>36.3</sla> </constants> </pft> Note that these values are passed through PEcAn.ED2::convert.samples.ED, so they should generally be given in PEcAn’s default units rather than ED2’s. Samples from the PEcAn meta analysis. These are also converted via PEcAn.ED2::convert.samples.ED. ED2 defaults that PEcAn knows about. These are stored in the edhistory.csv file inside of PEcAn.ED2 This file is re-generated manually whenever there is a new version of ED2, so while we try our best to keep it up to date, there is no guarantee that it is. (Implicitly) Defaults in the ED2 Fortran source code. In general, our goal is to set all parameters through PEcAn (via steps 1-3), but if you are running PEcAn with new or experimental versions of ED2, you should be extra careful to make sure ED2 is running with the parameters you intend. Again, the best way to know which parameters ED2 is actually using is to check the history.xml file produced once the run starts. The ED2IN field INCLUDE_THESE_PFT controls which of these PFTs are included in a given ED2 run. By default, PEcAn will set this field to only include the PFTs specified by the user. This is the recommended behavior because it ensures that all PFTs in ED2 were parameterized (one way or another, at least partially) through PEcAn. However, if you would like ED2 to run with all 17 PFTs (NOTE: using ED2’s internal defaults for all PFTs not specified by the user!), you can set the <all_pfts> XML tag (in the <model> section) to true: <model> <all_pfts>true</all_pfts> </model> 14.4.4 Model specific input files List of inputs required by model, such as met, etc. 14.4.5 Model configuration files ED2 is configured using 2 files which are placed in the run folder. ED2IN : template for this file is located at models/ed/inst/ED2IN.<revision>. The values in this template that need to be modified are described below and are surrounded with @ symbols. config.xml : this file is generated by PEcAn. Some values are stored in the pecan.xml in <pfts><pft><constants> section as well as in <model> section. An example of the template can be found in ED2IN.r82 The ED2IN template can contain the following variables. These will be replaced with actual values when the model configuration is written. **@ENSNAME@** : run id of the simulation, used in template for NL%EXPNME **@START_MONTH@** : start of simulation UTC time, from <run><start.date>, used in template for NL%IMONTHA **@START_DAY@** : start of simulation UTC time, from <run><start.date>, used in template for NL%IDATEA **@START_YEAR@** : start of simulation UTC time, from <run><start.date>, used in template for NL%IYEARA **@END_MONTH@** : end of simulation UTC time, from <run><end.date>, used in template for NL%IMONTHZ **@END_DAY@** : end of simulation UTC time, from <run><end.date>, used in template for NL%IDATEZ **@END_YEAR@** : end of simulation UTC time, from <run><end.date>, used in template for NL%IYEARZ **@SITE_LAT@** : site latitude location, from <run><site><lat>, used in template for NL%POI_LAT **@SITE_LON@** : site longitude location, from <run><site><lon>, used in template for NL%POI_LON **@SITE_MET@** : met header location, from <run><site><met>, used in template for NL%ED_MET_DRIVER_DB **@MET_START@** : first year of met data, from <run><site><met.start>, used in template for NL%METCYC1 **@MET_END@** : last year of met data, from <run><site><met.end>, used in template for NL%METCYCF **@PHENOL_SCHEME@** : phenology scheme, if this variabe is 1 the following 3 fields will be used, otherwise they will be set to empty strings, from <model><phenol.scheme>, used in template for NL%IPHEN_SCHEME **@PHENOL_START@** : first year for phenology, from <model><phenol.start>, used in template for NL%IPHENYS1 and NL%IPHENYF1 @PHENOL_END@** : last year for phenology, from <model><phenol.end>, used in template for NL%IPHENYSF and NL%IPHENYFF @PHENOL@** : path and prefix of the prescribed phenology data, from * <model><phenol>, used in template for NL%PHENPATH **@SITE_PSSCSS@** : path and prefix of the previous ecosystem state, from <model><psscss>, used in template for NL%SFILIN **@ED_VEG@** : path and prefix of the vegetation database, used only to determine the land/water mask, from <model><veg>, used in template for NL%VEG_DATABASE **@ED_SOIL@** : path and prefix of the soil database, used to determine the soil type, from <model><soil>, used in template for NL%SOIL_DATABASE **@ED_INPUTS@** : input directory with dataset to initialise chilling degrees and growing degree days, which is used to drive the cold-deciduous phenology, from <model><inputs>, used in template for NL%THSUMS_DATABASE **@FFILOUT@** : path and prefix for analysis files, generated from <run><host><outdir>/run.id/analysis, used in template for NL%FFILOUT **@SFILOUT@** : path and prefix for history files, generated from <run><host><outdir>/run.id/history, used in template for NL%SFILOUT **@CONFIGFILE@** : XML file containing additional parameter settings, this is always “config.xml”, used in template for NL%IEDCNFGF @OUTDIR@** : location where output files are written (without the runid**), from <run><host><outdir>, should not be used. **@SCRATCH@** : local scratch space for outputs, generated /scratch/<username>/run$scratch, should not be used right now since it only works on ebi-cluster 14.4.6 Installation notes This section contains notes on how to compile the model. The notes for the VM might work on other machines or configurations as well. 14.4.6.1 VM 14.4.6.2 BU geo 14.4.6.3 TACC lonestar module load hdf5 curl -o ED.r82.tgz http://isda.ncsa.illinois.edu/~kooper/EBI/ED.r82.tgz tar zxf ED.r82.tgz rm ED.r82.tgz cd ED.r82/ED/build/bin curl -o include.mk.lonestar http://isda.ncsa.illinois.edu/~kooper/EBI/include.mk.lonestar make OPT=lonestar 14.4.6.4 TACC stampede module load hdf5 curl -o ED.r82.tgz http://isda.ncsa.illinois.edu/~kooper/EBI/ED.r82.tgz tar zxf ED.r82.tgz rm ED.r82.tgz cd ED.r82/ED/build/bin curl -o include.mk.stampede http://isda.ncsa.illinois.edu/~kooper/EBI/include.mk.stampede make OPT=stampede "],["models-gday.html", "14.5 GDAY", " 14.5 GDAY Model Information Home Page Source Code License Authors PEcAn Integration Introduction Introduction about model PEcAn configuration file additions Should list the model specific additions to the PEcAn file here Model specific input files List of inputs required by model, such as met, etc. Model configuration files MODEL is configured using 3 files which are placed in the run folder, as well as a symbolic link to the met file. file1 : template for this file is located at models/MODEL/inst/file1 and is not modified. file2 : template for this file is located at models/MODEL/inst/file2 and is not modified. file3 : template for this file is in models/MODEL/inst/file3 or it is specified in the <model> section as <template>. The values in this template are replaced by those computed in the earlier stages of PEcAN. Installation notes This section contains notes on how to compile the model. The notes for the VM might work on other machines or configurations as well. VM "],["models-ldndc.html", "14.6 LDNDC", " 14.6 LDNDC Model Information Home Page https://ldndc.imk-ifu.kit.edu/about/model.php Source Code License Authors Prof. Dr. Klaus Butterbach-Bahl, Dr. Edwin Haas, … PEcAn Integration Henri Kajasilta 14.6.1 Introduction LandscapeDNDC is designed to simulate biosphere/hydrosphere processes of forest, arable and grassland ecosystems. It is modularly designed and therefore allows for various model selections to be plugged in. Most up-to-date information from LDNDC can be found here, as well as the authors, users guide and documentation of the model: https://ldndc.imk-ifu.kit.edu/index.php PEcAn configuration file additions The following sections of the PEcAn XML are relevant or have influence at the moment: ensemble variable: Options are only LAI and GPP model id: Corresponding the id of LDNDC in BETY run inputs start_date: Start date of the simulation end_date: End date of the simulation 14.6.2 Model specific input files LDNDC takes several files as an input, and can also generate multiple output files. These ńotes regarding input files are written from the PEcAn integration point of view, and detailed information about input files can be found from the user guide, which is available via the above links. Input files: Project — Contains essential information in order to set-up the simulation run. For example, the file contains information of other files which will be used in the simulations as well as the time period when the simulations are taking a place. Setup — Defines which modules are used. Climate — Met data that is used in simulations. Speciesparameters — Species used in the simulations should be defined here and the file holds the approppriate information of the used species. Siteparameters — Works similarly as species parameters, but in the site features point of view. Event — Holds information about the events, which are essential in arable simulations. Site — Specific information about the site. Airchemistry — Information about the airchemistry. 14.6.2.1 Model configuration files Due to the amount of different input files, there are several templates which are used in model configuration. Templates are located at models/ldndc/inst/ and are copied to the simulation’s run directory before the model run. project.ldndc : Project file is populated in write.configs.LDNDC based on the inputs of the PEcAn run. In addition to having the correct input files that are used in the simulation, the project file is filled in with the simulation time period and the directory paths regarding other input files that are read during the simulation. The file also sets the path for output files. These other input files are found from the same directory as the project file when starting the model run, and the output file is created among the other outputs of the simulation. At the moment, simulations are executed half-hourly. template.job : Will be filled in with the information that model execution requires in PEcAn. Calls for example models binary and executes the project.ldndc file. events_template.xml : Has three events (plant, manure and cut), which are filled in in write.configs.LDNDC. Events should be generated based on some event file, but such implementation has not been applied yet. This template is only meant for testing for now. speciesparameter_template.xml : Has one replacable section. This section is filled in within write.configs.LDNDC, and is currently just hard-coded values for perennial grass species. This will be one of the key files, because the used pfts and corresponding pft values are written here. At this stage, it is hard-coded to only have one pft, but it should be generated based on the trait values that are available from BETY database. siteparameters.xml : Some default value for siteparameters. site.xml : Some default values for site. airchemistry.txt : Some default data for airchemistry. climate.txt : This file is created from the netcdf file. A symbolic link will be created into the run directory for it. setup.xml : Contains information which modules the model simulation is using. Default settings should fit for most of the purposes and these are currently hard-coded. 14.6.3 Installation notes Obtaining LDNDC program requires credentials, which the user is able to request from the developers of the model. With the credentials, the pre-compiled LDNDC program can be downloaded here: https://ldndc.imk-ifu.kit.edu/download/download-model.php After obtaining the necessary files, the user should execute the install script that is found from the ldndc-‘version-number’ directory. With linux, executing would happen with command sh install.sh. Succesfull installation will create a .ldndc directory to the user’s home directory. Running the simulations happens by calling the ldndc executable (found from the /bin directory) and giving the path file to the project file that contains the specs of the simulation. Instructions in detail and how to play with these setups are explained more broadly in the user guide. "],["models-linkages.html", "14.7 LINKAGES", " 14.7 LINKAGES Model Information Home Page Source Code License Authors PEcAn Integration Introduction Introduction about model PEcAn configuration file additions Should list the model specific additions to the PEcAn file here Model specific input files List of inputs required by model, such as met, etc. Model configuration files MODEL is configured using 3 files which are placed in the run folder, as well as a symbolic link to the met file. file1 : template for this file is located at models/MODEL/inst/file1 and is not modified. file2 : template for this file is located at models/MODEL/inst/file2 and is not modified. file3 : template for this file is in models/MODEL/inst/file3 or it is specified in the <model> section as <template>. The values in this template are replaced by those computed in the earlier stages of PEcAN. Installation notes This section contains notes on how to compile the model. The notes for the VM might work on other machines or configurations as well. VM "],["models-lpjguess.html", "14.8 LPJ-GUESS", " 14.8 LPJ-GUESS Model Information Home Page Source Code License Authors PEcAn Integration Introduction Introduction about model PEcAn configuration file additions Should list the model specific additions to the PEcAn file here Model specific input files List of inputs required by model, such as met, etc. Model configuration files MODEL is configured using 3 files which are placed in the run folder, as well as a symbolic link to the met file. file1 : template for this file is located at models/MODEL/inst/file1 and is not modified. file2 : template for this file is located at models/MODEL/inst/file2 and is not modified. file3 : template for this file is in models/MODEL/inst/file3 or it is specified in the <model> section as <template>. The values in this template are replaced by those computed in the earlier stages of PEcAN. Installation notes This section contains notes on how to compile the model. The notes for the VM might work on other machines or configurations as well. VM "],["models-maespa.html", "14.9 MAESPA", " 14.9 MAESPA Model Information Home Page http://maespa.github.io/ Source Code http://maespa.github.io/download.html License Authors Belinda Medlyn and Remko Duursma PEcAn Integration Tony Gardella, Martim DeKauwe, Remki Duursma Introduction PEcAn configuration file additions Model specific input files Model configuration files MODEL is configured using 3 files which are placed in the run folder, as well as a symbolic link to the met file. file1 : template for this file is located at models/MODEL/inst/file1 and is not modified. file2 : template for this file is located at models/MODEL/inst/file2 and is not modified. file3 : template for this file is in models/MODEL/inst/file3 or it is specified in the <model> section as <template>. The values in this template are replaced by those computed in the earlier stages of PEcAN. Installation notes Installing the MAESPA model requires cloning the MAESPA Bitbucket Repository, executing the makefile, and ensuring that the Maeswarp R package is correctly installed. To clone and compile the model, execute this code at the command line git clone https://bitbucket.org/remkoduursma/maespa.git cd maespa make clean make maespa.out is your executable. Example input files can be found in the inputfiles directory. Executing measpa.out from within one of the example directories will produce output. MAESPA developers have also developed a wrapper package called Maeswrap. The usual R package installation method install.packages may present issues with downloading an unpacking a dependency package called rgl. Here are a couple of solutions: Solution 1 From the Command Line sudo apt-get install r-cran-rgl then from within R install.packages("Maeswrap") Solution 2 From the Command line sudo apt-get install libglu1-mesa-dev then from within R install.packages("Maeswrap") This section contains notes on how to compile the model. The notes for the VM might work on other machines or configurations as well. VM "],["models-preles.html", "14.10 PRELES", " 14.10 PRELES Model Information Home Page Source Code License Authors PEcAn Integration Introduction Introduction about model PEcAn configuration file additions Should list the model specific additions to the PEcAn file here Model specific input files List of inputs required by model, such as met, etc. Model configuration files MODEL is configured using 3 files which are placed in the run folder, as well as a symbolic link to the met file. file1 : template for this file is located at models/MODEL/inst/file1 and is not modified. file2 : template for this file is located at models/MODEL/inst/file2 and is not modified. file3 : template for this file is in models/MODEL/inst/file3 or it is specified in the <model> section as <template>. The values in this template are replaced by those computed in the earlier stages of PEcAN. Installation notes This section contains notes on how to compile the model. The notes for the VM might work on other machines or configurations as well. VM "],["models-sipnet.html", "14.11 SiPNET", " 14.11 SiPNET Model Information Home Page Source Code License Authors PEcAn Integration Michael Dietze, Rob Kooper Introduction Introduction about model PEcAn configuration file additions Should list the model specific additions to the PEcAn file here Model specific input files List of inputs required by model, such as met, etc. Model configuration files SIPNET is configured using 3 files which are placed in the run folder, as well as a symbolic link to the met file. sipnet.in : template for this file is located at models/sipnet/inst/sipnet.in and is not modified. sipnet.param-spatial : template for this file is located at models/sipnet/inst/template.param-spatial and is not modified. sipnet.param : template for this file is in models/sipnet/inst/template.param or it is specified in the <model> section as <default.param>. The values in this template are replaced by those computed in the earlier stages of PEcAN. Installation notes This section contains notes on how to compile the model. The notes for the VM might work on other machines or configurations as well. SIPNET version unk: if [ ! -e ${HOME}/sipnet_unk ]; then cd curl -o sipnet_unk.tar.gz http://isda.ncsa.illinois.edu/~kooper/PEcAn/models/sipnet_unk.tar.gz tar zxf sipnet_unk.tar.gz rm sipnet_unk.tar.gz fi cd ${HOME}/sipnet_unk/ make clean make sudo cp sipnet /usr/local/bin/sipnet.runk make clean SIPNET version 136: if [ ! -e ${HOME}/sipnet_r136 ]; then cd curl -o sipnet_r136.tar.gz http://isda.ncsa.illinois.edu/~kooper/EBI/sipnet_r136.tar.gz tar zxf sipnet_r136.tar.gz rm sipnet_r136.tar.gz sed -i 's#$(LD) $(LIBLINKS) \\(.*\\)#$(LD) \\1 $(LIBLINKS)#' ${HOME}/sipnet_r136/Makefile fi cd ${HOME}/sipnet_r136/ make clean make sudo cp sipnet /usr/local/bin/sipnet.r136 make clean VM "],["models-stics.html", "14.12 STICS", " 14.12 STICS Model Information Home Page https://www6.paca.inrae.fr/stics/ Source Code License Authors PEcAn Integration Istem Fer Introduction STICS (Simulateur mulTIdisciplinaire pour les Cultures Standard) is a crop model that has been developed since 1996 at INRA (French National Institute for Agronomic Research) in collaboration with other research (CIRAD, Irstea, Ecole des Mines de Paris, ESA, LSCE) or professional (ARVALIS, Terres Inovia, CTIFL, ITV, ITB, Agrotransferts, etc.) and teaching institutes or organizations. PEcAn configuration file additions Should list the model specific additions to the PEcAn file here Model specific input files List of inputs required by model, such as met, etc. Model configuration files STICS is configured using different XML files located in two fixed directories: config, plant and user defined workspace(s) directorie(s). A java app called JavaStics allows users to generate these files. Installation notes The software (JavaStics interface and STICS model) is available for download after a registration procedure (see procedure at http://www6.paca.inra.fr/stics_eng/ Download). VM "],["met-drivers.html", "15 Available Meteorological Drivers ", " 15 Available Meteorological Drivers "],["ameriflux.html", "15.1 Ameriflux", " 15.1 Ameriflux Scale: site Resolution: 30 or 60 min Availability: varies by site http:\\/\\/ameriflux.lbl.gov\\/data\\/data-availability\\/ Notes: Old ORNL server, use is deprecated "],["amerifluxlbl.html", "15.2 AmerifluxLBL", " 15.2 AmerifluxLBL Scale: site Resolution: 30 or 60 min Availability: varies by site http:\\/\\/ameriflux.lbl.gov\\/data\\/data-availability\\/ Notes: new Lawrence Berkeley Lab server "],["fluxnet2015.html", "15.3 Fluxnet2015", " 15.3 Fluxnet2015 Scale: site Resolution: 30 or 60 min Availability: varies by site http://fluxnet.fluxdata.org/sites/site-list-and-pages Notes: Fluxnet 2015 synthesis product. Does not cover all FLUXNET sites "],["narr.html", "15.4 NARR", " 15.4 NARR Scale: North America Resolution: 3 hr, approx. 32km \\(Lambert conical projection\\) Availability: 1979-present "],["cruncep.html", "15.5 CRUNCEP", " 15.5 CRUNCEP Scale: global Resolution: 6hr, 0.5 degree Availability: 1901-2010 "],["cmip5.html", "15.6 CMIP5", " 15.6 CMIP5 Scale: varies by model Resolution: 3 hr Availability: 2006-2100 Currently only GFDL available. Different scenerios and ensemble members can be set via Advanced Edit. "],["nldas.html", "15.7 NLDAS", " 15.7 NLDAS Scale: Lower 48 + buffer, Resolution: 1 hour, .125 degree Availability: 1980-present "],["gldas.html", "15.8 GLDAS", " 15.8 GLDAS Scale: Global Resolution: 3hr, 1 degree Availability: 1948-2010 "],["paleon.html", "15.9 PalEON", " 15.9 PalEON Scale: -100 to -60 W Lon, 35 to 50 N Latitude \\(US northern hardwoods + buffer\\) Resolution: 6hr, 0.5 degree Availability: 850-2010 "],["fluxnetlathuile.html", "15.10 FluxnetLaThuile", " 15.10 FluxnetLaThuile Scale: site Resolution: 30 or 60 min Availability: varies by site http:\\/\\/www.fluxdata.org\\/DataInfo\\/Dataset%20Doc%20Lib\\/SynthDataSummary.aspx Notes: 2007 synthesis. Fluxnet2015 supercedes this for sites that have been updated "],["geostreams.html", "15.11 Geostreams", " 15.11 Geostreams Scale: site Resolution: varies Availability: varies by site Notes: This is a protocol, not a single archive. The PEcAn functions currently default to querying [https://terraref.ncsa.illinois.edu/clowder/api/geostreams], which requires login and contains data from only two sites (Urbana IL and Maricopa AZ). However the interface can be used with any server that supports the Geostreams API. "],["era5.html", "15.12 ERA5", " 15.12 ERA5 Scale: Global Resolution: 3 hrs and 31 km Availability: 1950-present Notes: It’s important to know that the raw ERA5 tiles needs to be downloaded and registered in the database first. Inside the inst folder in the data.atmosphere package there are R files for downloading and registering files in the BETY. However, it assumes that you have registered and setup your API requirements. Check out how to setup your API [here] (https://confluence.ecmwf.int/display/CKB/How+to+download+ERA5#HowtodownloadERA5-3-DownloadERA5datathroughtheCDSAPI). In the inst folder you can find two files (ERA5_db_register.R and ERA5_USA_download.R). If you setup your ecmwf account as it’s explained in the link above, ERA5_USA_download.R will help you to download all the tiles with all the variables required for pecan extract.nc.ERA5 function to generate pecan standard met files. Besides installing the required packages for this file, it should work from top to bottom with no problem. After downloading the tiles, there is simple script in ERA5_db_register.R which helps you register your tiles in the bety. met.process later on uses that entery to find the required tiles for extracting met data for your sites. There are important points about this file. 1- Make sure you don’t change the site id in the script (which is the same the ParentSite in ERA5 registeration xml file). 2- Make sure the start and end date in that script matches the downloaded tiles. Set your ERA5.files.path to where you downloaded the tiles and then the rest of the script should be working fine. "],["icos-drought-2018.html", "15.13 ICOS Drought 2018", " 15.13 ICOS Drought 2018 Scale: site Resolution: 30 min Availability: Varies by site Notes: To use this option, set source as ICOS and a product tag containing drought2018 in pecan.xml "],["icos-ecosystem-archive.html", "15.14 ICOS Ecosystem Archive", " 15.14 ICOS Ecosystem Archive Scale: site Resolution: 30 min Availability: Varies by site Notes: To use this option, set source as ICOS and a product tag containing etc in pecan.xml "],["download-gfdl.html", "15.15 Download GFDL", " 15.15 Download GFDL The Downlad.GFDL function assimilates 3 hour frequency CMIP5 outputs generated by multiple GFDL models. GFDL developed several distinct modeling streams on the timescale of CMIP5 and AR5. These models include CM3, ESM2M and ESM2G with a spatial resolution of 2 degrees latitude by 2.5 degrees longitude. Each model has future outputs for the AR5 Representative Concentration Pathways ranging from 2006-2100. "],["cm3.html", "15.16 CM3", " 15.16 CM3 GFDL’s CMIP5 experiments with CM3 included many of the integrations found in the long-term CMIP5 experimental design. The focus of this physical climate model is on the role of aerosols, aerosol-cloud interactions, and atmospheric chemistry in climate variability and climate change. "],["esm2m-esm2g.html", "15.17 ESM2M & ESM2G", " 15.17 ESM2M & ESM2G Two new models representing ocean physics with alternative numerical frameworks to explore the implications of some of the fundamental assumptions embedded in these models. Both ESM2M and ESM2G utilize a more advanced land model, LM3, than was available in ESM2.1 including a variety of enhancements (Milly et al., in prep). GFDL’s CMIP5 experiments with Earth System Models included many of the integrations found in the long-term CMIP5 experimental design. The ESMs, by design, close the carbon cycle and are used to study the impact of climate change on ecosystems, ecosystem changes on climate and human activities on ecosystems. For more information please navigate here CM# ESM2M ESM2G rcp26 r1i1p1 r1i1p1 rcp45 r1i1p1, r3i1p1,r5i1p1 r1i1p1 r1i1p1 rcp60 r1i1p1 r1i1p1 rcp85 r1i1p1 r1i1p1 r1i1p1 "],["pecan-project-api.html", "16 PEcAn Project API ", " 16 PEcAn Project API "],["introduction-4.html", "16.1 Introduction", " 16.1 Introduction Welcome to the PEcAn Project API Documentation. The Predictive Ecosystem Analyser (PEcAn) Project is an open source framework initiated to meet the demands for more accessible, transparent & repeatable modeling of ecosystems. PEcAn can be considered as an ecoinformatics toolbox combined with a set of workflows that wrap around ecosystem models that allow users to effectively perform data synthesis, propagation of uncertainty through a model & ecological predictions in an integrated fashion using a diverse repository of data & models. Our API allows users to remotely interact with the PEcAn servers and leverage the functionalities provided by the PEcAn Project. It has been designed to follow common RESTful API conventions. Most operations are performed using the HTTP methods: GET (retrieve) & POST (create). Please note that the PEcAn Project API is currently under active development and is possible that any information in this document is subject to change. "],["authentication.html", "16.2 Authentication", " 16.2 Authentication Authentication to the PEcAn API occurs via Basic HTTP Auth. The credentials for using the API are the same as those used to log into PEcAn & BetyDB. Here is how you use basic HTTP auth with curl: $ curl --user '<username>:<password>' <api-endpoint> Authentication also depends on the PEcAn server that the user interacts with. Some servers, at the time of deployment have the AUTH_REQ = FALSE, meaning that such servers do not require user autertication for the usage of the PEcAn APIs. Regardless of the type of server, the endpoints defind under General section can be accessed without any authentication. "],["restful-api-endpoints.html", "16.3 RESTful API Endpoints", " 16.3 RESTful API Endpoints This page contains the high-level overviews & the functionalities offered by the different RESTful endpoints of the PEcAn API. For the most up-to-date documentation, you can visit the PEcAn API Documentation. The currently implemented functionalities include: General: GET /api/ping: Ping the server to check if it is live GET /api/status: Obtain general information about PEcAn & the details of the database host Models: GET /api/models/: Search for model(s) using search pattern based on model name & revision GET /api/models/{model_id}: Fetch the details of specific model Sites: GET /api/sites/: Search for site(s) using search pattern based on site name GET /api/sites/{site_id}: Fetch the details of specific site PFTs: GET /api/pfts/: Search for PFT(s) using search pattern based on PFT name, PFT type & Model type GET /api/pfts/{pft_id}: Fetch the details of specific PFT Formats: GET /api/formats/: Search for format(s) using search pattern based on format name & mime type GET /api/formats/{format_id}: Fetch the details of specific Format Inputs: GET /api/inputs/: Search for inputs needed for a PEcAn workflow based on model_id, site_id, format_id & host_id. GET /api/inputs/{input_id} *: Download the desired input file (if the input_id corresponds to a folder, an optional filename argument must be provided) Workflows: GET /api/workflows/: Retrieve a list of PEcAn workflows POST /api/workflows/: Submit a new PEcAn workflow GET /api/workflows/{id}: Obtain the details of a particular PEcAn workflow by supplying its ID GET /api/workflows/{id}/status: Obtain the status of a particular PEcAn workflow GET /api/workflows/{id}/file/{filename}: Download the desired file from a PEcAn workflow Runs: GET /api/runs: Get the list of all the runs GET /api/runs/{run_id}: Fetch the details of a specified PEcAn run GET /api/runs/{run_id}/input/{filename}: Download the desired input file for a run GET /api/runs/{run_id}/output/{filename}: Download the desired output file for a run GET /api/runs/{run_id}/graph/{year}/{y_var}: Plot the graph of desired output variables for a run * indicates that the particular API is under development & may not be ready for use "],["examples.html", "16.4 Examples:", " 16.4 Examples: 16.4.1 Prerequisites to interact with the PEcAn API Server 16.4.1.1 R Packages httr jsonlite xml2 16.4.1.2 Python Packages requests json xml Following are some example snippets to call the PEcAn API endpoints: 16.4.2 GET /api/ping 16.4.2.1 R Snippet res <- httr::GET("http://pecan.localhost/api/ping") print(jsonlite::fromJSON(rawToChar(res$content))) ## $request ## [1] "ping" ## $response ## [1] "pong" 16.4.2.2 Python Snippet response = requests.get("http://pecan.localhost/api/ping") print(json.dumps(response.json(), indent=2)) ## { ## "request": "ping", ## "response": "pong" ## } 16.4.3 GET /api/status 16.4.3.1 R Snippet res <- httr::GET("http://pecan.localhost/api/status") print(jsonlite::fromJSON(rawToChar(res$content))) ## $pecan_details$version ## [1] "1.7.0" ## $pecan_details$branch ## [1] "develop" ## $pecan_details$gitsha1 ## [1] "unknown" ## $host_details$hostid ## [1] 99 ## $host_details$hostname ## [1] "" ## $host_details$start ## [1] 99000000000 ## $host_details$end ## [1] 99999999999 ## $host_details$sync_url ## [1] "" ## $host_details$sync_contact ## [1] "" 16.4.3.2 Python Snippet response = requests.get("http://pecan.localhost/api/status") print(json.dumps(response.json(), indent=2)) ## { ## "pecan_details": { ## "version": "1.7.0", ## "branch": "develop", ## "gitsha1": "unknown" ## }, ## "host_details": { ## "hostid": 99, ## "hostname": "", ## "start": 99000000000, ## "end": 99999999999, ## "sync_url": "", ## "sync_contact": "" ## } ## } 16.4.4 GET /api/models/ 16.4.4.1 R Snippet # Search model(s) with `model_name` containing "sip" & `revision` containing "ssr" res <- httr::GET( "http://pecan.localhost/api/models/?model_name=sip&revision=ssr&ignore_case=TRUE", httr::authenticate("carya", "illinois") ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $models ## model_id model_name revision ## 1 1000000022 SIPNET ssr ## $count ## [1] 1 16.4.4.2 Python Snippet # Search model(s) with `model_name` containing "sip" & `revision` containing "ssr" response = requests.get( "http://pecan.localhost/api/models/?model_name=sip&revision=ssr&ignore_case=TRUE", auth=HTTPBasicAuth('carya', 'illinois') ) print(json.dumps(response.json(), indent=2)) ## { ## "models": [ ## { ## "model_id": "1000000022", ## "model_name": "SIPNET", ## "revision": "ssr" ## } ## ], ## "count": 1 ## } 16.4.5 GET /api/models/{model_id} 16.4.5.1 R Snippet # Fetch the details of PEcAn model with id = 1000000022 res <- httr::GET( "http://pecan.localhost/api/models/1000000022", httr::authenticate("carya", "illinois") ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $modeltype_id ## [1] 3 ## $model_type ## [1] "SIPNET" ## $model_id ## [1] 1000000022 ## $model_name ## [1] "SIPNET" ## $revision ## [1] "ssr" ## $inputs ## input required ## 1 met TRUE ## 2 poolinitcond FALSE 16.4.5.2 Python Snippet # Fetch the details of PEcAn model with id = 1000000022 response = requests.get( "http://pecan.localhost/api/models/1000000022", auth=HTTPBasicAuth('carya', 'illinois') ) print(json.dumps(response.json(), indent=2)) ## { ## "model_id": "1000000022", ## "model_name": "SIPNET", ## "revision": "ssr", ## "modeltype_id": 3, ## "model_type": "SIPNET" ## "inputs": [ ## { ## "input": "met", ## "required": TRUE ## }, ## { ## "input": "poolinitcond", ## "required": FALSE ## } ## ] ## } 16.4.6 GET /api/sites/ 16.4.6.1 R Snippet # Search site(s) with `site_name` containing "willow" res <- httr::GET( "http://pecan.localhost/api/sites/?sitename=willow&ignore_case=TRUE", httr::authenticate("carya", "illinois") ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $sites ## id sitename ## 1 676 Willow Creek (US-WCr) ## 2 1108 Willow Creek (WC)-Chequamegon National Forest ## 3 1202 Tully_willow ## 4 1223 Saare SRF willow plantation ## 5 1000005151 Willow Creek (US-WCr) ## $count ## [1] 5 16.4.6.2 Python Snippet # Search site(s) with `site_name` containing "willow" response = requests.get( "http://pecan.localhost/api/models/?sitename=willow&ignore_case=TRUE", auth=HTTPBasicAuth('carya', 'illinois') ) print(json.dumps(response.json(), indent=2)) ## { ## "sites": [ ## { ## "id": 676, ## "sitename": "Willow Creek (US-WCr)" ## }, ## { ## "id": 1108, ## "sitename": "Willow Creek (WC)-Chequamegon National Forest" ## }, ## { ## "id": 1202, ## "sitename": "Tully_willow" ## }, ## { ## "id": 1223, ## "sitename": "Saare SRF willow plantation" ## }, ## { ## "id": 1000005151, ## "sitename": "Willow Creek (US-WCr)" ## } ## ], ## "count": 5 ## } 16.4.7 GET /api/sites/{site_id} 16.4.7.1 R Snippet # Fetch the details of PEcAn site with id = 676 res <- httr::GET( "http://pecan.localhost/api/sites/676", httr::authenticate("carya", "illinois") ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $id ## [1] 676 ## $city ## [1] "Park Falls Ranger District" ## $state ## [1] "Wisconsin" ## $country ## [1] "US" ## $mat ## [1] 4 ## $map ## [1] 815 ## $soil ## [1] "" ## $som ## [1] "NA" ## $notes ## [1] "MF" ## $soilnotes ## [1] "" ## $sitename ## [1] "Willow Creek (US-WCr)" ## $greenhouse [1] FALSE ## $sand_pct ## [1] 42.52 ## $clay_pct ## [1] 20.17 ## $time_zone ## [1] "America/Chicago" 16.4.7.2 Python Snippet # Fetch the details of PEcAn site with id = 676 response = requests.get( "http://pecan.localhost/api/sites/676", auth=HTTPBasicAuth('carya', 'illinois') ) print(json.dumps(response.json(), indent=2)) ## { ## "id": 676, ## "city": "Park Falls Ranger District", ## "state": "Wisconsin", ## "country": "US", ## "mat": 4, ## "map": 815, ## "soil": "", ## "notes": "MF", ## "soilnotes": "", ## "sitename": "Willow Creek (US-WCr)", ## "greenhouse": false, ## "sand_pct": 42.52, ## "clay_pct": 20.17, ## "time_zone": "America/Chicago" ## } 16.4.8 GET /api/pfts/ 16.4.8.1 R Snippet # Search pft(s) of "plant" type with `pft_name` containing "temperate" & belonging to `model_type` "SIPNET" res <- httr::GET( "http://pecan.localhost/api/pfts/?pft_name=temperate&pft_type=plant&model_type=sipnet&ignore_case=TRUE", httr::authenticate("carya", "illinois") ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $pfts ## model_type pft_id pft_name pft_type ## <chr> <S3: integer64> <chr> <chr> ## 1 SIPNET 41 temperate.deciduous plant ## 2 SIPNET 1000000105 temperate.deciduous.IF plant ## 3 SIPNET 1000000107 temperate.deciduous_SDA plant ## 4 SIPNET 1000000115 temperate.deciduous.ALL plant ## 5 SIPNET 1000000118 temperate.deciduous.ALL.NORMAL plant ## 6 SIPNET 2000000017 tundra.deciduous.NGEE_Arctic plant ## 7 SIPNET 2000000045 temperate.broadleaf.deciduous plant ## $count ## [1] 7 16.4.8.2 Python Snippet # Search pft(s) of "plant" type with `pft_name` containing "temperate" & belonging to `model_type` "SIPNET" response = requests.get( "http://pecan.localhost/api/pfts/?pft_name=temperate&pft_type=plant&model_type=sipnet&ignore_case=TRUE", auth=HTTPBasicAuth('carya', 'illinois') ) print(json.dumps(response.json(), indent=2)) ## { ## "pfts": [ ## { ## "model_type": "SIPNET", ## "pft_id": 41, ## "pft_name": "temperate.deciduous", ## "pft_type": "plant" ## }, ## ... ## ], ## "count": 7 ## } 16.4.9 GET /api/pfts/{pft_id} 16.4.9.1 R Snippet # Fetch the details of PEcAn PFT with id = 2000000045 res <- httr::GET( "http://pecan.localhost/api/pfts/2000000045", httr::authenticate("carya", "illinois") ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $model_type ## [1] "SIPNET" ## $pft_id ## [1] 2000000045 ## $pft_name ## [1] "temperate.broadleaf.deciduous" ## $definition ## [1] "SIPNET Temperate Deciduous PFT with priors on all parameters" ## $pft_type ## [1] "plant" 16.4.9.2 Python Snippet # Fetch the details of PEcAn PFT with id = 2000000045 response = requests.get( "http://pecan.localhost/api/pfts/2000000045", auth=HTTPBasicAuth('carya', 'illinois') ) print(json.dumps(response.json(), indent=2)) ## { ## "model_type": "SIPNET", ## "pft_id": 2000000045, ## "pft_name": "temperate.broadleaf.deciduous", ## "definition": "SIPNET Temperate Deciduous PFT with priors on all parameters", ## "pft_type": "plant" ## } 16.4.10 GET /api/formats/ 16.4.10.1 R Snippet # Search format(s) with name containing 'ameriflux' & mime type containing 'csv' res <- httr::GET( "http://pecan.localhost/api/formats/?format_name=ameriflux&mimetype=csv&ignore_case=TRUE", httr::authenticate("carya", "illinois") ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $formats ## mimetype format_id format_name ## 1 text/csv 19 AmeriFlux.level4.h ## 2 text/csv 20 AmeriFlux.level4.d ## 3 text/csv 21 AmeriFlux.level4.w ## 4 text/csv 22 AmeriFlux.level4.m ## 5 text/csv 35 AmeriFlux.level2.h ## 6 text/csv 36 AmeriFlux.level3.h ## $count ## [1] 6 16.4.10.2 Python Snippet # # Search format(s) with name containing 'ameriflux' & mime type containing 'csv' response = requests.get( "http://pecan.localhost/api/formats/?format_name=ameriflux&mimetype=csv&ignore_case=TRUE", auth=HTTPBasicAuth('carya', 'illinois') ) print(json.dumps(response.json(), indent=2)) ## { ## "formats": [ ## { ## "mimetype": "text/csv", ## "format_id": 19, ## "format_name": "AmeriFlux.level4.h" ## }, ## { ## "mimetype": "text/csv", ## "format_id": 20, ## "format_name": "AmeriFlux.level4.d" ## }, ## ... ## ], ## "count": 6 ## } 16.4.11 GET /api/formats/{format_id} 16.4.11.1 R Snippet # Fetch the details of PEcAn format with id = 19 res <- httr::GET( "http://pecan.localhost/api/formats/19", httr::authenticate("carya", "illinois") ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $mimetype ## [1] "text/csv" ## $format_id ## [1] 19 ## $name ## [1] "AmeriFlux.level4.h" ## $notes ## [1] "Half-hourly AmeriFlux level 4 gap filled, partitioned, and flagged flux tower data. Variables description: Level 4 data are obtained from the level 3 products, data are ustar filtered, gap-filled using different methods (ANN and MDS) and partitioned (i.e. NEE, GPP, and Re). Flags with information regarding quality of the original and gapfilled data are added. Missing values: -9999." ## $header ## [1] "" ## $format_variables ## description name unit ## 1 Latent heat flux LE_f W m-2 ## 2 Sensible heat flux H_f W m-2 ## 3 air temperature Ta_f degrees C ## 4 Vapor Pressure Deficit VPD_f Pa ## 5 Cumulative ecosystem respiration over a specified time step Reco umol C02 m-2 s-1 ## 6 Net ecosystem exchange NEE_st_fMDS umol C m-2 s-1 16.4.11.2 Python Snippet # Fetch the details of PEcAn format with id = 19 response = requests.get( "http://pecan.localhost/api/formats/19", auth=HTTPBasicAuth('carya', 'illinois') ) print(json.dumps(response.json(), indent=2)) ## { ## "mimetype": "text/csv", ## "format_id": 19, ## "name": "AmeriFlux.level4.h", ## "notes": "Half-hourly AmeriFlux level 4 gap filled, partitioned, and flagged flux tower data. Variables description: ## Level 4 data are obtained from the level 3 products, data are ustar filtered, gap-filled using different methods (ANN and ## MDS) and partitioned (i.e. NEE, GPP, and Re). Flags with information regarding quality of the original and gapfilled data ## are added. Missing values: -9999.", ## "header": "", ## "format_variables": [ ## { ## "description": "Latent heat flux", ## "name": "LE_f", ## "unit": "W m-2" ## }, ## ... ## ] ## } 16.4.12 GET /api/inputs/ 16.4.12.1 R Snippet # Get the inputs needed for a workflow with model_id = 1000000022 & site_id = 676 res <- httr::GET( "http://pecan.localhost/api/inputs/?model_id=1000000022&site_id=676", httr::authenticate("carya", "illinois") ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $inputs ## sitename model_name revision tag hostname file_name format_name mimetype ## 1 Willow Creek (US-WCr) SIPNET ssr met ebi-forecast.igb.illinois.edu wcr.clim Sipnet.climna text/csv ## 2 Willow Creek (US-WCr) SIPNET ssr met docker wcr.clim Sipnet.climna text/csv ## file_path id input_name start_date end_date ## 1 /home/share/data/dbfiles/ 235 2000-01-01 06:00:00 2007-01-01 05:59:00 ## 2 /data/sites/willow 2000000001 1998-01-01 00:00:00 2006-12-31 00:00:00 ## $count ## [1] 2 # Get the inputs needed for a workflow with format_id = 5000000002 (AMERIFLUX_BASE_HH) & host_id = 99000000001 (docker) res <- httr::GET( "http://pecan.localhost/api/inputs/?format_id=5000000002&host_id=99000000001", httr::authenticate("carya", "illinois") ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $inputs ## sitename mimetype format_name hostname file_name ## 1 Niwot Ridge Forest/LTER NWT1 (US-NR1) text/csv AMERIFLUX_BASE_HH docker AMF_US-NR1_BASE_HH_15-5 ## file_path id input_name start_date end_date ## 1 /data/dbfiles/AmerifluxLBL_site_0-772 1000011238 AmerifluxLBL_site_0-772 1998-01-01 2016-12-31 ## $count ## [1] 1 16.4.12.2 Python Snippet # Get the inputs needed for a workflow with model_id = 1000000022 & site_id = 676 response = requests.get( "http://pecan.localhost/api/inputs/?model_id=1000000022&site_id=676", auth=HTTPBasicAuth('carya', 'illinois') ) print(json.dumps(response.json(), indent=2)) ## { ## "inputs": [ ## { ## "sitename": "Willow Creek (US-WCr)", ## "model_name": "SIPNET", ## "revision": "ssr", ## "tag": "met", ## "hostname": "ebi-forecast.igb.illinois.edu", ## "file_name": "wcr.clim", ## "format_name": "Sipnet.climna", ## "mimetype": "text/csv", ## "file_path": "/home/share/data/dbfiles/", ## "id": 235, ## "input_name": "", ## "start_date": "2000-01-01 06:00:00", ## "end_date": "2007-01-01 05:59:00" ## }, ## ... ## ], ## "count": 2 ## } # Get the inputs needed for a workflow with format_id = 5000000002 (AMERIFLUX_BASE_HH) & host_id = 99000000001 (docker) response = requests.get( "http://pecan.localhost/api/inputs/?format_id=5000000002&host_id=99000000001", auth=HTTPBasicAuth('carya', 'illinois') ) print(json.dumps(response.json(), indent=2)) ## { ## "inputs": [ ## { ## "sitename": "Niwot Ridge Forest/LTER NWT1 (US-NR1)", ## "hostname": "docker", ## "file_name": "AMF_US-NR1_BASE_HH_15-5", ## "format_name": "AMERIFLUX_BASE_HH", ## "mimetype": "text/csv", ## "file_path": "/data/dbfiles/AmerifluxLBL_site_0-772", ## "id": 1000011238, ## "input_name": "AmerifluxLBL_site_0-772", ## "start_date": "1998-01-01", ## "end_date": "2016-12-31" ## } ## ], ## "count": 1, ## } 16.4.13 GET /api/inputs/{input_id} 16.4.13.1 R Snippet # Download the input file with id = 99000000003 res <- httr::GET( "http://pecan.localhost/api/inputs/99000000003", httr::authenticate("carya", "illinois") ) writeBin(res$content, "test.2002.nc") # Download the file 'fraction.plantation' from the input directory with id = 295 res <- httr::GET( "http://pecan.localhost/api/inputs/295?filename=fraction.plantation", httr::authenticate("carya", "illinois") ) writeBin(res$content, "test.fraction.plantation") 16.4.13.2 Python Snippet # Download the input file for with id = 99000000003 response = requests.get( "http://pecan.localhost/api/inputs/99000000003", auth=HTTPBasicAuth('carya', 'illinois') ) with open("test.2002.nc", "wb") as file: file.write(response.content) # Download the file 'fraction.plantation' from the input directory with id = 295 response = requests.get( "http://pecan.localhost/api/inputs/295?filename=fraction.plantation", auth=HTTPBasicAuth('carya', 'illinois') ) with open("test.2002.nc", "wb") as file: file.write(response.content) 16.4.14 GET /api/workflows/ 16.4.14.1 R Snippet # Get workflow(s) that use `model_id` = 1000000022 [SIPNET] & `site_id` = 676 [Willow Creek (US-WCr)] res <- httr::GET( "http://pecan.localhost/api/workflows/?model_id=1000000022&site_id=676", httr::authenticate("carya", "illinois") ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $workflows ## id folder started_at site_id model_id hostname start_date end_date ## 1 1000009900 /fs/data2/output//PEcAn_1000009900 2018-11-09 08:56:37 676 1000000022 geo.bu.edu 2004-01-01 2004-12-31 ## 2 1000009172 /fs/data2/output//PEcAn_1000009172 2018-04-11 18:14:52 676 1000000022 test-pecan.bu.edu 2004-01-01 2004-12-31 ## ... ## $count ## [1] 5 16.4.14.2 Python Snippet # Get workflow(s) that use `model_id` = 1000000022 [SIPNET] & `site_id` = 676 [Willow Creek (US-WCr)] response = requests.get( "http://pecan.localhost/api/workflows/?model_id=1000000022&site_id=676", auth=HTTPBasicAuth('carya', 'illinois') ) print(json.dumps(response.json(), indent=2)) ## { ## "workflows": [ ## { ## "id": 1000009172, ## "folder": "/fs/data2/output//PEcAn_1000009900", ## "started_at": "2018-11-09 08:56:37", ## "site_id": 676, ## "model_id": 1000000022, ## "hostname": "geo.bu.edu", ## "start_date": "2004-01-01", ## "end_date": "2004-12-31" ## }, ## ... ## ], ## "count": 5 ## } 16.4.15 POST /api/workflows/ 16.4.15.1 R Snippet # Submit a workflow in XML format for execution xmlFile <- "pecan/tests/api.sipnet.xml" xml_string <- paste0(xml2::read_xml(xmlFile)) res <- httr::POST( "http://pecan.localhost/api/workflows/", httr::authenticate("carya", "illinois"), httr::content_type("application/xml"), body = xml_string ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $workflow_id ## [1] 99000000001 ## $status ## [1] "Submitted successfully" 16.4.15.2 Python Snippet # Submit a workflow in XML format for execution xml_file = "pecan/tests/api.sipnet.xml" root = xml.etree.ElementTree.parse(xml_file).getroot() response = requests.post( "http://pecan.localhost/api/workflows/", auth=HTTPBasicAuth('carya', 'illinois'), headers = {'Content-Type': 'application/xml'}, data = xml.etree.ElementTree.tostring(root, encoding='unicode', method='xml') ) print(json.dumps(response.json(), indent=2)) ## { ## "workflow_id": "99000000001", ## "status": "Submitted successfully" ## } 16.4.16 GET /api/workflows/{id} 16.4.16.1 R Snippet # Get details of workflow with `id` = '1000009172' res <- httr::GET( "http://pecan.localhost/api/workflows/1000009172", httr::authenticate("carya", "illinois") ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $id ## [1] "1000009172" ## $folder ## [1] "/fs/data2/output//PEcAn_1000009172" ## $hostname ## [1] "test-pecan.bu.edu" ## $user_id ## [1] "NA" ## $properties ## $properties$end ## [1] "2004/12/31" ## $properties$pft ## $properties$pft[[1]] ## [1] "soil.IF" ## $properties$pft[[2]] ## [1] "temperate.deciduous.IF" ## $properties$email ## [1] "" ## $properties$notes ## [1] "" ## $properties$start ## [1] "2004/01/01" ## $properties$siteid ## [1] "676" ## $properties$modelid ## [1] "1000000022" ## $properties$hostname ## [1] "test-pecan.bu.edu" ## $properties$sitename ## [1] "WillowCreek(US-WCr)" ## $properties$input_met ## [1] "AmerifluxLBL.SIPNET" ## $properties$pecan_edit ## [1] "on" ## $properties$sitegroupid ## [1] "1000000022" ## $properties$fluxusername ## [1] "pecan" ## $properties$input_poolinitcond ## [1] "-1" 16.4.16.2 Python Snippet # Get details of workflow with `id` = '1000009172' response = requests.get( "http://pecan.localhost/api/workflows/1000009172", auth=HTTPBasicAuth('carya', 'illinois') ) print(json.dumps(response.json(), indent=2)) ## { ## "id": "1000009172", ## "folder": "/fs/data2/output//PEcAn_1000009172", ## "hostname": "test-pecan.bu.edu", ## "user_id": "NA", ## "properties": { ## "end": "2004/12/31", ## "pft": [ ## "soil.IF", ## "temperate.deciduous.IF" ## ], ## "email": "", ## "notes": "", ## "start": "2004/01/01", ## "siteid": "676", ## "modelid": "1000000022", ## "hostname": "test-pecan.bu.edu", ## "sitename": "WillowCreek(US-WCr)", ## "input_met": "AmerifluxLBL.SIPNET", ## "pecan_edit": "on", ## "sitegroupid": "1000000022", ## "fluxusername": "pecan", ## "input_poolinitcond": "-1" ## } ## } 16.4.17 GET /api/workflows/{id}/status 16.4.17.1 R Snippet # Get list of run belonging to the workflow with `workflow_id` = '99000000001' res <- httr::GET( "http://pecan.localhost/api/workflows/99000000001/status", httr::authenticate("carya", "illinois") ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $workflow_id ## [1] "99000000001" ## $status ## [1] "TRAIT 2020-07-22 07:02:33 2020-07-22 07:02:35 DONE " ## [2] "META 2020-07-22 07:02:35 2020-07-22 07:02:38 DONE " ## [3] "CONFIG 2020-07-22 07:02:38 2020-07-22 07:02:40 DONE " ## [4] "MODEL 2020-07-22 07:02:40 2020-07-22 07:04:07 DONE " ## [5] "OUTPUT 2020-07-22 07:04:07 2020-07-22 07:04:08 DONE " ## [6] "ENSEMBLE 2020-07-22 07:04:08 2020-07-22 07:04:09 DONE " ## [7] "SENSITIVITY 2020-07-22 07:04:09 2020-07-22 07:04:16 DONE " ## [8] "FINISHED 2020-07-22 07:04:16 2020-07-22 07:04:16 DONE " 16.4.17.2 Python Snippet # Get list of run belonging to the workflow with `workflow_id` = '99000000001' response = requests.get( "http://pecan.localhost/api/workflows/99000000001/status", auth=HTTPBasicAuth('carya', 'illinois') ) print(json.dumps(response.json(), indent=2)) ## { ## "workflow_id": "99000000001", ## "status": [ ## "TRAIT 2020-07-22 07:02:33 2020-07-22 07:02:35 DONE ", ## "META 2020-07-22 07:02:35 2020-07-22 07:02:38 DONE ", ## "CONFIG 2020-07-22 07:02:38 2020-07-22 07:02:40 DONE ", ## "MODEL 2020-07-22 07:02:40 2020-07-22 07:04:07 DONE ", ## "OUTPUT 2020-07-22 07:04:07 2020-07-22 07:04:08 DONE ", ## "ENSEMBLE 2020-07-22 07:04:08 2020-07-22 07:04:09 DONE ", ## "SENSITIVITY 2020-07-22 07:04:09 2020-07-22 07:04:16 DONE ", ## "FINISHED 2020-07-22 07:04:16 2020-07-22 07:04:16 DONE " ## ] ## } 16.4.18 GET /api/workflows/{id}/file/{filename} 16.4.18.1 R Snippet # Download the 'ensemble.ts.99000000017.NPP.2002.2002.Rdata' output file for the workflow with id = 99000000031 res <- httr::GET( "http://pecan.localhost/api/workflows/99000000031/file/ensemble.ts.99000000017.NPP.2002.2002.Rdata", httr::authenticate("carya", "illinois") ) writeBin(res$content, "test.ensemble.ts.99000000017.NPP.2002.2002.Rdata") 16.4.18.2 Python Snippet # Download the 'ensemble.ts.99000000017.NPP.2002.2002.Rdata' output file for the workflow with id = 99000000031 response = requests.get( "http://pecan.localhost/api/workflows/99000000031/file/ensemble.ts.99000000017.NPP.2002.2002.Rdata", auth=HTTPBasicAuth('carya', 'illinois') ) with open("test.ensemble.ts.99000000017.NPP.2002.2002.Rdata", "wb") as file: file.write(response.content) 16.4.19 GET /api/runs/ 16.4.19.1 R Snippet # Get list of run belonging to the workflow with `workflow_id` = '1000009172' res <- httr::GET( "http://pecan.localhost/api/runs/?workflow_id=1000009172", httr::authenticate("carya", "illinois") ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $runs ## runtype ensemble_id workflow_id id model_id site_id parameter_list start_time ## finish_time ## 1 ensemble 1000017624 1000009172 1002042201 1000000022 796 ensemble=1 2005-01-01 ## 00:00:00 2011-12-31 00:00:00 ## ... ## $count ## [1] 50 16.4.19.2 Python Snippet # Get list of run belonging to the workflow with `workflow_id` = '1000009172' response = requests.get( "http://pecan.localhost/api/runs/?workflow_id=1000009172", auth=HTTPBasicAuth('carya', 'illinois') ) print(json.dumps(response.json(), indent=2)) ## { ## "runs": [ ## { ## "runtype": "ensemble", ## "ensemble_id": 1000017624, ## "workflow_id": 1000009172, ## "id": 1002042201, ## "model_id": 1000000022, ## "site_id": 796, ## "parameter_list": "ensemble=1", ## "start_time": "2005-01-01", ## "finish_time": "2011-12-31" ## }, ## ... ## ] ## "count": 50, ## "next_page": "http://pecan.localhost/api/workflows/?workflow_id=1000009172&offset=50&limit=50" ## } 16.4.20 GET /api/runs/{run_id} 16.4.20.1 R Snippet # Get details of run belonging with `id` = '99000000282' res <- httr::GET( "http://pecan.localhost/api/runs/99000000282", httr::authenticate("carya", "illinois") ) print(jsonlite::fromJSON(rawToChar(res$content))) ## $runtype ## [1] "sensitivity analysis" ## $ensemble_id ## [1] 99000000016 ## $workflow_id ## [1] 99000000031 ## $id ## [1] 99000000282 ## $model_id ## [1] 1000000014 ## $site_id ## [1] 772 ## $start_time ## [1] "2002-01-01" ## $finish_time ## [1] "2002-12-31" ## $parameter_list ## [1] "quantile=MEDIAN,trait=all,pft=temperate.coniferous" ## $started_at ## [1] "2020-07-22 07:02:40" ## $finished_at ## [1] "2020-07-22 07:02:57" ## $inputs ## $inputs$info ## [1] "README.txt" ## $inputs$others ## [1] "sipnet.clim" "sipnet.in" "sipnet.param" "sipnet.param-spatial" ## $outputs ## $outputs$logfile ## [1] "logfile.txt" ## $outputs$info ## [1] "README.txt" ## $outputs$years ## $outputs$years$`2002` ## $outputs$years$`2002`$data ## [1] "2002.nc" ## $outputs$years$`2002`$variables ## $outputs$years$`2002`$variables$GPP ## [1] "Gross Primary Productivity" ## $outputs$years$`2002`$variables$NPP ## [1] "Net Primary Productivity" ## ... 16.4.20.2 Python Snippet # Get details of run with `id` = '1002042201' response = requests.get( "http://pecan.localhost/api/runs/1002042201", auth=HTTPBasicAuth('carya', 'illinois') ) print(json.dumps(response.json(), indent=2)) ## { ## "runtype": "ensemble", ## "ensemble_id": 1000017624, ## "workflow_id": 1000009172, ## "id": 1002042201, ## "model_id": 1000000022, ## "site_id": 796, ## "parameter_list": "ensemble=1", ## "start_time": "2005-01-01", ## "finish_time": "2011-12-31", ## "inputs": { ## "info": "README.txt", ## "others": [ ## "sipnet.clim", ## "sipnet.in", ## "sipnet.param", ## "sipnet.param-spatial" ## ] ## } ## "outputs": { ## "logfile": "logfile.txt", ## "info": "README.txt", ## "years": { ## "2002": { ## "data": "2002.nc", ## "variables": { ## "GPP": "Gross Primary Productivity", ## "NPP": "Net Primary Productivity", ## ... ## } ## } ## } ## } ## } 16.4.21 GET /api/runs/{run_id}/input/{filename} 16.4.21.1 R Snippet # Download the 'sipnet.in' input file for the run with id = 99000000282 res <- httr::GET( "http://pecan.localhost/api/runs/99000000282/input/sipnet.in", httr::authenticate("carya", "illinois") ) writeBin(res$content, "test.sipnet.in") 16.4.21.2 Python Snippet # Download the 'sipnet.in' input file for the run with id = 99000000282 response = requests.get( "http://pecan.localhost/api/runs/99000000282/input/sipnet.in", auth=HTTPBasicAuth('carya', 'illinois') ) with open("test.sipnet.in", "wb") as file: file.write(response.content) 16.4.22 GET /api/runs/{run_id}/output/{filename} 16.4.22.1 R Snippet # Download the '2002.nc' output file for the run with id = 99000000282 res <- httr::GET( "http://pecan.localhost/api/runs/99000000282/output/2002.nc", httr::authenticate("carya", "illinois") ) writeBin(res$content, "test.2002.nc") 16.4.22.2 Python Snippet # Download the '2002.nc' output file for the run with id = 99000000282 response = requests.get( "http://pecan.localhost/api/runs/99000000282/output/2002.nc", auth=HTTPBasicAuth('carya', 'illinois') ) with open("test.2002.nc", "wb") as file: file.write(response.content) 16.4.23 GET /api/runs/{run_id}/graph/{year}/{y_var} 16.4.23.1 R Snippet # Plot the Gross Primary Productivity vs Time for the run with ID `99000000282` for the year 2002 res <- httr::GET( "http://pecan.localhost/api/runs/99000000282/graph/2002/GPP", httr::authenticate("carya", "illinois") ) writeBin(res$content, "test.png") 16.4.23.2 Python Snippet # Plot the Gross Primary Productivity vs Time for the run with ID `99000000282` for the year 2002 response = requests.get( "http://pecan.localhost/api/runs/99000000282/graph/2002/GPP", auth=HTTPBasicAuth('carya', 'illinois') ) with open("test.png", "wb") as file: file.write(response.content) "],["database-sync.html", "17 Database synchronization", " 17 Database synchronization The database synchronization consists of 2 parts: - Getting the data from the remote servers to your server - Sharing your data with everybody else "],["how-does-it-work.html", "17.1 How does it work?", " 17.1 How does it work? Each server that runs the BETY database will have a unique machine_id and a sequence of ID’s associated. Whenever the user creates a new row in BETY it will receive an ID in the sequence. This allows us to uniquely identify where a row came from. This is information is crucial for the code that works with the synchronization since we can now copy those rows that have an ID in the sequence specified. If you have not asked for a unique ID your ID will be 99. The synchronization code itself is split into two parts, loading data with the load.bety.sh script and exporting data using dump.bety.sh. If you do not plan to share data, you only need to use load.bety.sh to update your database. "],["set-up.html", "17.2 Set up", " 17.2 Set up Requests for new machine ID’s is currently handled manually. To request a machine ID contact Rob Kooper kooper@illinois.edu. In the examples below this ID is referred to as ‘my siteid’. To setup the database to use this ID you need to call load.bety in ‘CREATE’ mode (replacing with the ID if your site) sudo -u postgres {$PECAN}/scripts/load.bety.sh -c -u -m <my siteid> WARNING: At the moment running CREATE deletes all current records in the database. If you are running from the VM this includes both all runs you have done and all information that the database is prepopulated with (e.g. input and model records). Remote records can be fetched (see below), but local records will be lost (we’re working on improving this!) "],["fetch-latest-data.html", "17.3 Fetch latest data", " 17.3 Fetch latest data When logged into the machine you can fetch the latest data using the load.bety.sh script. The script will check what site you want to get the data for and will remove all data in the database associated with that id. It will then reinsert all the data from the remote database. The script is configured using environment variables. The following variables are recognized: - DATABASE: the database where the script should write the results. The default is bety. - OWNER: the owner of the database (if it is to be created). The default is bety. - PG_OPT: additional options to be added to psql (default is nothing). - MYSITE: the (numerical) ID of your site. If you have not requested an ID, use 99; this is used for all sites that do not want to share their data (i.e. VM). 99 is in fact the default. - REMOTESITE: the ID of the site you want to fetch the data from. The default is 0 (EBI). - CREATE: If ‘YES’, this indicates that the existing database (bety, or the one specified by DATABASE) should be removed. Set to YES (in caps) to remove the database. THIS WILL REMOVE ALL DATA in DATABASE. The default is NO. - KEEPTMP: indicates whether the downloaded file should be preserved. Set to YES (in caps) to keep downloaded files; the default is NO. - USERS: determines if default users should be created. Set to YES (in caps) to create default users with default passwords. The default is NO. All of these variables can be specified as command line arguments as well, to see the options use -h. load.bety.sh -h ./scripts/load.bety.sh [-c YES|NO] [-d database] [-h] [-m my siteid] [-o owner] [-p psql options] [-r remote siteid] [-t YES|NO] [-u YES|NO] -c create database, THIS WILL ERASE THE CURRENT DATABASE, default is NO -d database, default is bety -h this help page -m site id, default is 99 (VM) -o owner of the database, default is bety -p additional psql command line options, default is empty -r remote site id, default is 0 (EBI) -t keep temp folder, default is NO -u create carya users, this will create some default users dump.bety.sh -h ./scripts/dump.bety.sh [-a YES|NO] [-d database] [-h] [-l 0,1,2,3,4] [-m my siteid] [-o folder] [-p psql options] [-u YES|NO] -a use anonymous user, default is YES -d database, default is bety -h this help page -l level of data that can be dumped, default is 3 -m site id, default is 99 (VM) -o output folder where dumped data is written, default is dump -p additional psql command line options, default is -U bety -u should unchecked data be dumped, default is NO "],["sharing-data.html", "17.4 Sharing data", " 17.4 Sharing data Sharing your data requires a few steps. First, before entering any data, you will need to request an ID from the PEcAn developers. Simply open an issue at github and we will generate an ID for you. If possible, add the URL of your data host. You will now need to synchronize the database again and use your ID. For example if you are given ID=42 you can use the following command: MYID=42 REMOTEID=0 ./scripts/load.bety.sh. This will load the EBI database and set the ID’s such that any data you insert will have the right ID. To share your data you can now run the dump.bey.sh. The script is configured using environment variables, the following variables are recognized: - DATABASE: the database where the script should write the results. The default is bety. - PG_OPT: additional options to be added to psql (default is nothing). - MYSITE: the ID of your site. If you have not requested an ID, use 99, which is used for all sites that do not want to share their data (i.e. VM). 99 is the default. - LEVEL: the minimum access-protection level of the data to be dumped (0=private, 1=restricted, 2=internal collaborators, 3=external collaborators, 4=public). The default level for exported data is level 3. - note that currently only the traits and yields tables have restrictions on sharing. If you share data, records from other (meta-data) tables will be shared. If you wish to extend the access_level to other tables please submit a feature request. - UNCHECKED: specifies whether unchecked traits and yields be dumped. Set to YES (all caps) to dump unchecked data. The default is NO. - ANONYMOUS: specifies whether all users be anonymized. Set to YES (all caps) to keep the original users (INCLUDING PASSWORD) in the dump file. The default is NO. - OUTPUT: the location of where on disk to write the result file. The default is ${PWD}/dump. NOTE: If you want your dumps to be accessible to other PEcAn servers you need to perform the following additional steps Open pecan/scripts/load.bety.sh In the DUMPURL section of the code add a new record indicating where you are dumping your data. Below is the example for SITE number 1 (Boston University) elif [ "${REMOTESITE}" == "1" ]; then DUMPURL="http://psql-pecan.bu.edu/sync/dump/bety.tar.gz" Check your Apache settings to make sure this location is public Commit this code and submit a Pull Request From the URL in the Pull Request, PEcAn administrators will update the machines table, the status map, and notify other users to update their cron jobs (see Automation below) Plans to simplify this process are in the works "],["automation.html", "17.5 Automation", " 17.5 Automation Below is an example of a script to synchronize PEcAn database instances across the network. db.sync.sh #!/bin/bash ## make sure psql is in PATH export PATH=/usr/pgsql-9.3/bin/:$PATH ## move to export directory cd /fs/data3/sync ## Dump Data MYSITE=1 /home/dietze/pecan/scripts/dump.bety.sh ## Load Data from other sites MYSITE=1 REMOTESITE=2 /home/dietze/pecan/scripts/load.bety.sh MYSITE=1 REMOTESITE=5 /home/dietze/pecan/scripts/load.bety.sh MYSITE=1 REMOTESITE=0 /home/dietze/pecan/scripts/load.bety.sh ## Timestamp sync log echo $(date +%c) >> /home/dietze/db.sync.log Typically such a script is set up to run as a cron job. Make sure to schedule this job (crontab -e) as the user that has database privileges (typically postgres). The example below is a cron table that runs the sync every hour at 12 min after the hour. MAILTO=user@yourUniversity.edu 12 * * * * /home/dietze/db.sync.sh "],["database-maintentance.html", "17.6 Database maintentance", " 17.6 Database maintentance All databases need maintenance performed on them. Depending upon the database type this can happen automatically, or it needs to be run through a scheduler or manually. The BETYdb database is Postgresql and it needs to be reindexed and vacuumed on a regular basis. Reindexing introduces efficiencies back into the database by reorganizing the indexes. Vacuuming the database frees up resources to the database by rearranging and compacting the database. Both of these operations are necessary and safe. As always if there’s a concern, a backup of the database should be made ahead of time. While running the reindexing and vacuuming commands, users will notice a slowdown at times. Therefore it’s better to run these maintenance tasks during off hours. 17.6.1 Reindexing the database As mentioned above, reindexing allows the database to become more efficient. Over time as data gets updated and deleted, the indexes become less efficient. This has a negative inpact on executed statements. Reindexing makes the indexes efficient again (at least for a while) allowing faster statement execution and reducing the overall load on the database. The reindex.bety.sh script is provided to simplify reindexing the database. reindex.bety.sh -h ./reindex.bety.sh [-c datalog] [-d database] [-h] [-i table names] [-p psql options] [-q] [-s] [-t tablename] -c catalog, database catalog name used to search for tables, default is bety -d database, default is bety -h this help page -i table names, list of space-separated table names to skip over when reindexing -p additional psql command line options, default is -U bety -q the reindexing should be quiet -s reindex the database after reindexing the tables (this should be done sparingly) -t tablename, the name of the one table to reindex If the database is small enough it’s reasonable to reindex the entire database at one time. To do this manually run or schedule the REINDEX statement. For example: reindex.bety.sh -s For larger databases it may be desireable to reindex entire tables at a time. An efficient way to do this is to reindex the larger tables and then the entire database. For example: reindex.bety.sh -t traits; reindex.bety.sh -t yields; reindex.bety.sh -s For very large databases it may be desirable to reindex one or more individual indexes before reindexing tables and the databases. In this case running specific psql commands to reindex those specific indexes, followed by reindexing the table is a possible approach. For example: psql -U bety -c "REINDEX INDEX index_yields_on_citation_id; REINDEX INDEX index_yields_on_cultivar_id;" reindex.bety.sh -t yields; Splitting up the indexing commands over time allows the database to operate efficiently with minimal impact on users. One approach is to schedule the reindexing of large, complex tables at a spcific off-time during the week, followed by a general reindexing and excluding those large tables on a weekend night. Please refere to the Automation section above for information on using cron to schedule reindexing commands. 17.6.2 Vacuuming the database Vacuuming the BETYdb Postgresql database reduces the amount of resources it uses and introduces its own efficiencies. Over time, modified and deleted records leave ‘holes’ in the storage of the database. This is a common feature for most databases. Each database has its own way of handing this, in Postgresql it’s the VACUUM command. The VACUUM command performs two main operations: cleaning up tables to make memory use more efficient, and analyze tables for optimum statement execution. The use of the keyword ANALYZE indicates the second operation should take place. The vacuum.bety.sh script is provided to simplify vacuuming the database. vacuum.bety.db -h ./vacuum.bety.sh [-c datalog] [-d database] [-f] [-h] [-i table names] [-n] [-p psql options] [-q] [-s] [-t tablename] [-z] -c catalog, database catalog name used to search for tables, default is bety -d database, default is bety -f perform a full vacuum to return resources to the system. Specify rarely, if ever -h this help page -i table names, list of space-separated table names to skip over when vacuuming -n only vacuum the tables and do not analyze, default is to first vacuum and then analyze -p additional psql command line options, default is -U bety -q the export should be quiet -s skip vacuuming the database after vacuuming the tables -t tablename, the name of the one table to vacuum -z only perform analyze, do not perform a regular vacuum, overrides -n and -f, sets -s For small databases with light loads it may be possible to set aside a time for a complete vacuum. During this time, commands executed against the database might fail (a temporary condition as the database gets cleaned up). The following commands can be used to perform all the vaccum operations in one go. vacuum.bety.sh -f Generally it’s not desireable to have down time. If the system running the database doesn’t need resources that the database is using returned to it, a FULL vacuum can be avoided. This is the default behavior of the script vacuum.bety.sh In larger databases, vacuuming the entire database can take a long time causing a negative impact on users. This means that individual tables need to be vacuumed. How often a vacuum needs to be performed is dependent upon a table’s activity. The more frequently updates and deletes occur on a table, the more frequent the vaccum should be. For large tables it may be desireable to separate the table cleanup from the analysis. An example for completely vacuuming and analyzing a table is: psql -U bety -c "VACUUM traits; VACUUM ANALYZE traits;" Similar to indexes, vacuuming the most active tables followed by general database vacuuming and vacuum analyze may be a desireable approach. Also note that it isn’t necessary to run VACUUM ANALYZE for each vacuum performed. Separating the commands and performing a VACUUM ANALYZE after several regular vacuums may be sufficient, with less load on the database. If the BETYdb database is running on a system with limited resources, or with resources that have become limited, the VACCUM command can return resources to the system from the database. The normal vacuuming process releases resources back to the database for reuse, but not to the system; generally this isn’t a problem. Postgresql has a VACUUM keyword FULL that returns resources back to the system. Requesting a FULL vacuum will lock the table being vacuumed while it is being re-written preventing any statements from being executed against it. If performing VECUUM FULL against the entire database, only the table being actively worked on is locked. To minimize the impact a VACUUM FULL has on users, it’s best to perform a normal vacuum before a FULL vacuum. If this approach is taken, there sould be a minimal time gap between the normal VACUUM and the VACUUM FULL commands. A normal vacuum allows changes to be made thus requiring the full vacuum to handle those changes, extending it’s run time. Reducing the time between the two commands lessens the work VACUUM FULL needs to do. psql -U bety -c "VACUUM yields; VACUUM FULL yields; VACUUM ANALYZE yields;" Give its impact, it’s typically not desireable to perform a VACUUM FULL after every normal vacuum; it should be done on an “as needed” basis or infrequently. "],["troubleshooting-1.html", "17.7 Troubleshooting", " 17.7 Troubleshooting There are several possibilities if a scheduled cron job apepars to be running but isn’t producing the expected results. The following are suggestions on what to try to resolve the issue. 17.7.1 Username and password The user that scheduled a cron job may not have access permissions to the database. This can be easily confirmed by running the command line from the cron job while logged in as the user that scheduled the job. An error message will be shown if the user doesn’t have permissions. To resolve this, be sure to include a valid database user (not a BETYdb user) with their credentials on the command in crontab. 17.7.2 db_hba.conf file Iit’s possible that the machine hosting the docker image of the database doesn’t have permissions to access the database. This may due to the cron job running on a machine that is not the docker instance of the database. It may be necessary to look at the loga on the hosting machine to determine if database access permissions are causing a problem. Logs are stored in different locations depending upon the Operating System of the host and upon other environmental factors. This document doesn’t provide information on where to find the logs. To begin, it’s best to look at the contents of the relevent database configuration file. The following command will display the contents of the db_hba.conf file. psql -U postgres -qAt -c "show hba_file" | xargs grep -v -E '^[[:space:]]*#' This command should return a series of text lines. For each row except those begining with ‘local’, the fourth item describes the machines that can access the database. In some cases an IP mask is specified in the fifth that further restricts the machines that have access. The special work ‘all’ in the fourth column grants permissions to all machines. The last column on each line contains the authentication option for the machine(s) specified in the fourth column (with a possible fifth column IP mask modifier). Ensure that the host machine is listed under the fourth column (machine addresse range, or ‘all’), is also included in the IP mask if one was specified, and finally that any authentication option are not set to ‘reject’. If the host machine is not included the db_hba.conf file will need to be updated to allow access. "],["network-status-map.html", "17.8 Network Status Map", " 17.8 Network Status Map https://pecan2.bu.edu/pecan/status.php Nodes: red = down, yellow = out-of-date schema, green = good Edges: red = fail, yellow = out-of-date sync, green = good "],["tasks.html", "17.9 Tasks", " 17.9 Tasks Following is a list of tasks we plan on working on to improve these scripts: - pecanproject/bety#368 allow site-specific customization of information and UI elements including title, contacts, logo, color scheme. "],["standalone-tools-modules.html", "18 Standalone tools (modules)", " 18 Standalone tools (modules) Radiative transfer modeling and remote sensing (modules/rtm); vignette Photosynthesis (modules/photosynthesis); vignette Allometry (modules/allometry); vignette Load data (modules/benchmark – PEcAn.benchmark::load_data) "],["LoadData.html", "18.1 Loading Data in PEcAn", " 18.1 Loading Data in PEcAn If you are loading data in to PEcAn for benchmarking, using the Benchmarking shiny app [provide link?] is recommended. Data can be loaded manually using the load_data function which in turn requires providing data format information using query.format.vars and the path to the data using query.file.path. Below is a description of the load_data function an a simple example of loading data manually. 18.1.1 Inputs Required data.path: path to the data that is the output of the function query.file.path (see example below) format: R list object that is the output of the function query.format.vars (see example below) Optional start_year = NA: end_year = NA: site = NA vars.used.index=NULL 18.1.2 Output R data frame containing the requested variables converted in to PEcAn standard name and units and time steps in POSIX format. 18.1.3 Example The data for this example has already been entered in to the database. To add new data go to new data documentation. To load the Ameriflux data for the Harvard Forest (US-Ha1) site. Create a connection to the BETY database. This can be done using R function bety = PEcAn.DB::betyConnect(php.config = "pecan/web/config.php") where the complete path to the config.php is specified. See here for an example config.php file. Look up the inputs record for the data in BETY. To find the input ID, either look at The url of the record (see image above) In R run library(dplyr) input_name = "AmerifluxLBL_site_0-758" #copied directly from online input.id = tbl(bety,"inputs") %>% filter(name == input_name) %>% pull(id) Additional arguments to query.format.vars are optional If you only want to load a subset of dates in the data, specify start and end year, otherwise all data will be loaded. If you only want to load a select list of variables from the data, look up their IDs in BETY, otherwise all variables will be loaded. In R run format = PEcAn.DB::query.format.vars(bety, input.id) Examine the resulting R list object to make sure it returned the correct information. The example format contains the following objects: $file_name [1] "AMERIFLUX_BASE_HH" $mimetype [1] "csv" $skip [1] 2 $header [1] 1 $na.strings [1] "-9999" "-6999" "9999" "NA" $time.row [1] 4 $site [1] 758 $lat [1] 42.5378 $lon [1] -72.1715 $time_zone [1] "America/New_York" The first 4 rows of the table format$vars looks like this: bety_name variable_id input_name input_units storage_type column_number bety_units mstmip_name mstmip_units pecan_name pecan_units air_pressure 554 PA kPa 19 Pa Psurf Pa Psurf Pa airT 86 TA celsius 4 degrees C Tair K Tair K co2atm 135 CO2_1 umol mol-1 20 umol mol-1 CO2air micromol mol-1 CO2air micromol mol-1 datetime 5000000001 TIMESTAMP_START ymd_hms %Y%m%d%H%M 1 ymd_hms NA NA datetime ymd_hms Get the path to the data data.path = PEcAn.DB::query.file.path( input.id = input.id, host_name = PEcAn.remote::fqdn(), con = bety) Load the data data = PEcAn.benchmark::load_data(data.path = data.path, format = format) "],["remote-data-module.html", "18.2 Remote data module", " 18.2 Remote data module Remote data module retrieves remote sensing data from MODISTools, Google Earth Engine and AppEEARS, as well as from the NASA’s Land Processes Distributed Active Archive Center (LP DAAC) data server. For currently available R functions see here, and for the Python functions see RpTools. The downloaded data can be used while performing further analyses in PEcAn. 18.2.0.1 Google Earth Engine Google Earth Engine is a cloud-based platform for performing analysis on satellite data. It provides access to a large data catalog through an online JavaScript code editor and a Python API. Datasets currently available for use in PEcAn via Google Earth Engine are, Sentinel-2 MSI gee2pecan_s2(). It is possible to estimate Leaf Area Index (LAI) from Sentinel-2 data using the ESA SNAP algorithm. SMAP Global Soil Moisture Data gee2pecan_smap() Landsat 8 Surface Reflectance gee2pecan_l8() Global Forest Canopy Height, 2019 gee2pecan_gedi() 18.2.0.2 AppEEARS AppEEARS (Application for Extracting and Exploring Analysis Ready Samples) is an online tool which provides an easy to use interface for downloading analysis ready remote sensing data. Products available on AppEEARS. Note: AppEEARS uses a task based system for processing the data request, it is possible for a task to run for long hours before it gets completed. The module checks the task status after every 60 seconds and saves the files when the task gets completed. 18.2.0.3 LP DAAC Data Pool LP DAAC (Land Processes Distributed Active Archive Center) Data Pool is a NASA Earthdata login-enabled server located at the USGS Earth Resources Observation and Science (EROS) Center that archives and distributes land data products. Similar to AppEEARS, using this source also requires an Earthdata account (see below). Currently this pipeline is implemented and tested only for the GEDI dataset. For more information about the data you are downloading, including documentation and how to properly cite the data, please visit https://lpdaac.usgs.gov/. 18.2.0.4 Set-Up instructions (first time and one time only): Sign up for the Google Earth Engine. Follow the instructions here to sign up for using GEE. You need to have your own GEE account for using the GEE download functions in this module. Sign up for NASA Earthdata. Using AppEEARS and LP DAAC Data Pool requires an Earthdata account visit this page to create your own account. Install the RpTools package. Python codes required by this module are stored in a Python package named “RpTools” using this requires Python3 and the package manager pip3 to be installed in your system. To install the package, Navigate to pecan/modules/data.remote/inst/RpTools If you are inside the pecan directory, this can be done by, cd modules/data.remote/inst/RpTools Use pip3 to install the package. “-e” flag is used to install the package in an editable or develop mode, so that changes made to the code get updated in the package without reinstalling. pip3 install -e . Authenticate GEE. The GEE API needs to be authenticated using your credentials. The credentials will be stored locally on your system. This can be done by, #this will open a browser and ask you to sign in with the Google account registered for GEE earthengine authenticate Alternate way, python3 import ee ee.Authenticate() Save the Earthdata credentials. If you wish to use AppEEARS or LP DAAC data pool you will have to store your username and password inside a JSON file and then pass its file path as an argument in remote_process 18.2.0.5 Usage guide: This module is accesible using the R function remote_process which uses the Python package “RpTools” (located at data.remote/inst/RpTools) for downloading and processing data. RpTools has a function named rp_control which controls two other functions, get_remote_data which controls the scripts which are used for downloading data from the source. For example, gee2pecan_s2 downloads bands from Sentinel 2 using GEE. process_remote_data which controls the scripts responsible for processing the raw data. For example, bands2lai_snap uses the downloaded bands to compute LAI using the SNAP algorithm. Workflow of the module 18.2.1 Configuring remote_process remote_process is configured using remote data tags in the pecan.xml. The required tags are described below, <remotedata> <out_get_data>...</out_get_data> <source>...</source> <collection>...</collection> <scale>...</scale> <projection>...</projection> <qc>...</qc> <algorithm>...</algorithm> <credfile>...</credfile> <out_process_data>...</out_process_data> <overwrite>...</overwrite> </remotedata> out_get_data: (required) type of raw output requested, e.g, bands, smap source: (required) source of remote data, e.g., gee or appeears collection: (required) dataset or product name as it is provided on the source, e.g. “COPERNICUS/S2_SR” for gee or “SPL3SMP_E.003” for appeears scale: (optional) pixel resolution required for some gee collections, recommended to use 10 for Sentinel 2 scale Information about how GEE handles scale can be found out here projection: (optional) type of projection. Only required for appeears polygon AOI type qc: (optional) quality control parameter, required for some gee collections overwrite: (optional) if TRUE database checks will be skipped and existing data of same type will be replaced entirely. When processed data is requested, the raw data required for creating it will also be replaced. By default FALSE If you don’t want to enter your Earthdata credentials everytime you use AppEEARS or LP DAAC use the following tag too: credfile: (optional) absolute path to JSON file containing Earthdata username and password, only required when using AppEEARS and LP DAAC data pool. The contents of this file could be as simple as the following: { "username": "yourEARTHDATAusername", "password": "yourEARTHDATApassword" } These tags are only required if processed data (i.e. further processing after downloading the data) is requested: out_process_data: (optional) type of processed output requested, e.g, lai algorithm: (optional) algorithm used for processing data, currently only SNAP is implemented to estimate LAI from Sentinel-2 bands Additional information are taken from the registration files located at pecan/modules/data.remote/inst/registration, each source has its own registration file. This is so because there isn’t a standardized way to retrieve all image collections from GEE and each image collection may require its own way of performing quality checks, etc whereas all of the products available on AppEEARS can be retrieved using its API in a standardized way. GEE registration file (register.GEE.xml) : collection original_name original name of the image collection, e.g. COPERNICUS/S2_SR pecan_name short form of original name using which the collection is represented in PEcAn, e.g. s2 coord coord_type coordinate type supported by the collection scale the default value of the scale can be specified here qc the default value of the qc parameter can be specified here raw_format format details of the raw file id id of the format name name of the format mimetype MIME type pro_format format details of the processed file when the collection is used to create a processed file id id of the format name name of the format mimetype MIME type AppEEARS and LP DAAC Data Pool registration files (register.APPEEARS.xml and register.LPDAACDATAPOOL.xml) : coord coord_type coordinate type supported by the product raw_format format details of the output file id id of the format name name of the format mimetype MIME type Remaining input data: start date, end date: these are taken from the run tag in pecan.xml outdir: from the outdir tag in pecan.xml Area of interest: the coordinates and site name are found out from BETY using siteid present in the run tag. These are then used to create a GeoJSON file which is used by the download functions. The output data from the module are returned in the following tags: raw_id: input id of the raw file raw_path: absolute path to the raw file pro_id: input id of the processed file pro_path: absolute path to the processed file Output files: The output files are of netCDF type and are stored in a directory inside the specified outdir with the following naming convention: source_site_siteid The output files are created with the following naming convention: source_collection_scale_projection_qc_site_siteid_TimeStampOfFileCreation Whenever a data product is requested the output files are stored in the inputs table of BETYdb. Subsequently when the same product is requested again with a different date range but with the same qc, scale, projection the previous file in the db would be extended. The DB would always contain only one file of the same type. As an example, if a file containing Sentinel 2 bands for start date: 2018-01-01, end date: 2018-06-30 exists in the DB and the same product is requested again for a different date range one of the following cases would happen, New dates are ahead of the existing file: For example, if the requested dates are start: 2018-10-01, end: 2018-12-31 in this case the previous file will be extended forward meaning the effective start date of the file to be downloaded would be the day after the end date of the previous file record, i.e. 2018-07-01. The new and the previous file would be merged and the DB would now be having data for 2018-01-01 to 2018-12-31. New dates are preceding of the existing file: For example, if the requested dates are start: 2017-01-01, end: 2017-06-30 in this case the effective end date of the new download would be the day before the start date of the existing file, i.e., 2017-12-31. The new and the previous file would be merged and the file in the DB would now be having data for 2017-01-01 to 2018-06-30. New dates contain the date range of the existing file: For example, if the requested dates are start: 2016-01-01, end: 2019-06-30 here the existing file would be replaced entirely with the new file. A more efficient way of doing this could be to divide your request into two parts, i.e, first request for 2016-01-01 to 2018-01-01 and then for 2018-06-30 to 2019-06-30. When a processed data product such as SNAP-LAI is requested, the raw product (here Sentinel 2 bands) used to create it would also be stored in the DB. If the raw product required for creating the processed product already exists for the requested time period, the processed product would be created for the entire time period of the raw file. For example, if Sentinel 2 bands are present in the DB for 2017-01-01 to 2017-12-31 and SNAP-LAI is requested for 2017-03-01 to 2017-07-31, the output file would be containing LAI for 2017-01-01 to 2017-12-31. 18.2.1.1 Creating Polygon based sites A polygon site can be created in the BETYdb using the following way, PEcAn.DB::db.query("insert into sites (country, sitename, geometry) values ('country_name', 'site_name', ST_SetSRID(ST_MakePolygon(ST_GeomFromText('LINESTRING(lon lat elevation)')), crs));", con) Example, db.query("insert into sites (country, sitename, geometry) values ('FI', 'Qvidja_ca6cm', ST_SetSRID(ST_MakePolygon(ST_GeomFromText('LINESTRING(22.388957339620813 60.287395608412218 14.503780364990234, 22.389600591651835 60.287182336733203 14.503780364990234, 22.38705422266651 60.285516177775868 14.503780364990234, 22.386575219445195 60.285763643883932 14.503780364990234, 22.388957339620813 60.287395608412218 14.503780364990234 )')), 4326));", con) 18.2.1.2 Example use (GEE) This example will download Sentinel 2 bands and then use the SNAP algorithm to compute Leaf Area Index. Add remotedata tag to pecan.xml and configure it. <remotedata> <out_get_data>bands</out_get_data> <source>gee</source> <collection>COPERNICUS/S2_SR</collection> <scale>10</scale> <qc>1</qc> <algorithm>snap</algorithm> <out_process_data>LAI</out_process_data> </remotedata> Store the contents of pecan.xml in a variable named settings and pass it to remote_process. PEcAn.data.remote::remote_process(settings) The output netCDF files(bands and LAI) will be saved at outdir and their records would be kept in the inputs table of BETYdb. 18.2.1.3 Example use (AppEEARS) This example will download the layers of a SMAP product(SPL3SMP_E.003) Add remotedata tag to pecan.xml and configure it. <remotedata> <out_get_data>smap</out_get_data> <source>appeears</source> <collection>SPL3SMP_E.003</collection> <projection>native</projection> <algorithm></algorithm> <credfile>path/to/jsonfile/containingcredentials</credfile> </remotedata> Store the contents of pecan.xml in a variable named settings and pass it to remote_process. PEcAn.data.remote::remote_process(settings) The output netCDF file will be saved at outdir and its record would be kept in the inputs table of BETYdb. 18.2.1.4 Example use GEDI (LP DAAC data pool) <remotedata> <out_get_data>gedi</out_get_data> <source>lpdaacdatapool</source> <collection>GEDI02_B.002</collection> <credfile>path/to/jsonfile/containingcredentials</credfile> </remotedata> Store the contents of pecan.xml in a variable named settings and pass it to remote_process. PEcAn.data.remote::remote_process(settings) 18.2.1.5 Adding new GEE image collections Once you have the Python script for downloading the collection from GEE, please do the following to integrate it with this module. Make sure that the function and script names are same and named in the following way: gee2pecan_pecancodeofimagecollection pecancodeofimagecollection can be any name which you want to use for representing the collection is an easier way. Additionaly, ensure that the function accepts and uses the following arguments, geofile - (str) GeoJSON file containing AOI information of the site outdir - (str) path where the output file has to be saved start - (str) start date in the form YYYY-MM-DD end - (str) end date in the form YYYY-MM-DD scale and qc if applicable. Make sure the output file is of netCDF type and follows the naming convention described above. Store the Python script at pecan/modules/data.remote/inst/RpTools/RpTools Update the register.GEE.xml file. After performing these steps the script will be integrated with the remote data module and would be ready to use. "],["shiny.html", "19 Shiny ", " 19 Shiny "],["testing-the-shiny-server.html", "19.1 Testing the Shiny Server", " 19.1 Testing the Shiny Server Shiny can be difficult to debug because, when run as a web service, the R output is hidden in system log files that are hard to find and read. One useful approach to debugging is to use port forwarding, as follows. First, on the remote machine (including the VM), make sure R’s working directory is set to the directory of the Shiny app (e.g., setwd(/path/to/pecan/shiny/WorkflowPlots), or just open the app as an RStudio project). Then, in the R console, run the app as: shiny::runApp(port = XXXX) # E.g. shiny::runApp(port = 5638) Then, on your local machine, open a terminal and run the following command, matching XXXX to the port above and YYYY to any unused port on your local machine (any 4-digit number should work). ssh -L YYYY:localhost:XXXX <remote connection> # E.g., for the PEcAn VM, given the above port: # ssh -L 5639:localhost:5638 carya@localhost -p 6422 Now, in a web browser on your local machine, browse to localhost:YYYY (e.g., localhost:5639) to run whatever app you started with shiny::runApp in the previous step. All of the output should display in the R console where the shiny::runApp command was executed. Note that this includes any print, message, logger.*, etc. statements in your Shiny app. If the Shiny app hits an R error, the backtrace should include a line like Hit error at of server.R#LXX – that XX being a line number that you can use to track down the error. To return from the error to a normal R prompt, hit <Control>-C (alternatively, the “Stop” button in RStudio). To restart the app, run shiny::runApp(port = XXXX) again (keeping the same port). Note that Shiny runs any code in the pecan/shiny/<app> directory at the moment the app is launched. So, any changes you make to the code in server.R and ui.R or scripts loaded therein will take effect the next time the app is started. If for whatever reason this doesn’t work with RStudio, you can always run R from the command line. Also, note that the ability to forward ports (ssh -L) may depend on the ssh configuration of your remote machine. These instructions have been tested on the PEcAn VM (v.1.5.2+). "],["debugging-shiny-apps.html", "19.2 Debugging Shiny Apps", " 19.2 Debugging Shiny Apps When developing shiny apps you can run the application from rstudio and place breakpoints int he code. To do this you will need to do the following steps first (already done on the VM) before starting rstudio: - echo “options(shiny.port = 6438)” >> ${HOME}/.Rprofile - echo “options(shiny.launch.browser = ‘FALSE’)” >> ${HOME}/.Rprofile Next you will need to create a tunnel for port 6438 to the VM, which will be used to open the shiny app, the following command will creat this tunnel: ssh -l carya -p 6422 -L 6438:localhost:6438 localhost. Now you can from rstudio run your application using shiny::runApp() and it will show the output from the application in your console. You can now place breakpoints and evaluate the output. "],["checking-log-files.html", "19.3 Checking Log Files", " 19.3 Checking Log Files To create Log files on the VM, execute the following: sudo -s echo "preserve_logs true;" >> /etc/shiny-server/shiny-server.conf service shiny-server restart Then within the directory /var/log/shiny-server you will see log files for your specific shiny apps. "],["adding-to-pecan.html", "20 Adding to PEcAn", " 20 Adding to PEcAn Case studies Adding a model Adding input data Adding data through the web interface Adding new species, PFTs, and traits from a new site Add a site Add some species Add PFT Add trait data Adding a benchmark Adding a met driver Reference (How to edit records in bety) Models Species PFTs Traits Inputs DB files Variables Formats (Link each section to relevant Bety tables) "],["adding-model.html", "20.1 Adding An Ecosystem Model", " 20.1 Adding An Ecosystem Model Adding a model to PEcAn involves two activities: Updating the PEcAn database to register the model Writing the interface modules between the model and PEcAn Note that coupling a model to PEcAn should not require any changes to the model code itself. A key aspect of our design philosophy is that we want it to be easy to add models to the system and we want to using the working version of the code that is used by all other model users, not a special branch (which would rapidly end up out-of-date). 20.1.1 Using PEcAn Database To run a model within PEcAn requires that the PEcAn database has sufficient information about the model. This includes a MODEL_TYPE designation, the types of inputs the model requires, the location of the model executable, and the plant functional types used by the model. The instructions in this section assume that you will be specifying this information using the BETYdb web-based interface. This can be done either on your local VM (localhost:3280/bety or localhost:6480/bety) or on a server installation of BETYdb. However you interact with BETYdb, we encourage you to set up your PEcAn instance to support database syncs so that these changes can be shared and backed-up across the PEcAn network. The figure below summarizes the relevant database tables that need to be updated to add a new model and the primary variables that define each table. 20.1.2 Define MODEL_TYPE The first step to adding a model is to create a new MODEL_TYPE, which defines the abstract model class. This MODEL_TYPE is used to specify input requirements, define plant functional types, and keep track of different model versions. The MODEL_TYPE is created by selecting Runs > Model Type and then clicking on New Model Type. The MODEL_TYPE name should be identical to the MODEL package name (see Interface Module below) and is case sensitive. 20.1.3 MACHINE The PEcAn design acknowledges that the same model executables and input files may exist on multiple computers. Therefore, we need to define the machine that that we are using. If you are running on the VM then the local machine is already defined as pecan. Otherwise, you will need to select Runs > Machines, click New Machine, and enter the URL of your server (e.g. pecan2.bu.edu). 20.1.4 MODEL Next we are going to tell PEcAn where the model executable is. Select Runs > Files, and click ADD. Use the pull down menu to specify the machine you just defined above and fill in the path and name for the executable. For example, if SIPNET is installed at /usr/local/bin/sipnet then the path is /usr/local/bin/ and the file (executable) is sipnet. Now we will create the model record and associate this with the File we just registered. The first time you do this select Runs > Models and click New Model. Specify a descriptive name of the model (which doesn’t have to be the same as MODEL_TYPE), select the MODEL_TYPE from the pull down, and provide a revision identifier for the model (e.g. v3.2.1). Once the record is created select it from the Models table and click EDIT RECORD. Click on “View Related Files” and when the search window appears search for the model executable you just added (if you are unsure which file to choose you can go back to the Files menu and look up the unique ID number). You can then associate this Model record with the File by clicking on the +/- symbol. By contrast, clicking on the name itself will take you to the File record. In the future, if you set up the SAME MODEL VERSION on a different computer you can add that Machine and File to PEcAn and then associate this new File with this same Model record. A single version of a model should only be entered into PEcAn once. If a new version of the model is developed that is derived from the current version you should add this as a new Model record but with the same MODEL_TYPE as the original. Furthermore, you should set the previous version of the model as Parent of this new version. 20.1.5 FORMATS The PEcAn database keep track of all the input files passed to models, as well as any data used in model validation or data assimilation. Before we start to register these files with PEcAn we need to define the format these files will be in. To create a new format see Formats Documentation. 20.1.6 MODEL_TYPE -> Formats For each of the input formats you specify for your model, you will need to edit your MODEL_TYPE record to add an association between the format and the MODEL_TYPE. Go to Runs > Model Type, select your record and click on the Edit button. Next, click on “Edit Associated Formats” and choose the Format you just defined from the pull down menu. If the Input box is checked then all matching Input records will be displayed in the PEcAn site run selection page when you are defining a model run. In other words, the set of model inputs available through the PEcAn web interface is model-specific and dynamically generated from the associations between MODEL_TYPEs and Formats. If you also check the Required box, then the Input will be treated as required and PEcAn will not run the model if that input is not available. Furthermore, on the site selection webpage, PEcAn will filter the available sites and only display pins on the Google Map for sites that have a full set of required inputs (or where those inputs could be generated using PEcAn’s workflows). Similarly, to make a site appear on the Google Map, all you need to do is specify Inputs, as described in the next section, and the point should automatically appear on the map. 20.1.7 INPUTS After a file Format has been created then input files can be registered with the database. Creating Inputs can be found under How to insert new Input data. 20.1.8 Add Plant Functional Types (PFTs) Since many of the PEcAn tools are designed to keep track of parameter uncertainties and assimilate data into models, to use PEcAn with a model it is important to define Plant Functional Types for the sites or regions that you will be running the model. Create a new PFT entry by selecting Data > PFTs and then clicking on New PFT. Give the PFT a descriptive name (e.g., temperate deciduous). PFTs are MODEL_TYPE specific, so choose your MODEL_TYPE from the pull down menu. 20.1.8.1 Species Within PEcAn there are no predefined PFTs and user can create new PFTs very easily at whatever taxonomic level is most appropriate, from PFTs for individual species up to one PFT for all plants globally. To allow PEcAn to query its trait database for information about a PFT, you will want to associate species with the PFT record by choosing Edit and then “View Related Species”. Species can be searched for by common or scientific name and then added to a PFT using the +/- button. 20.1.8.2 Cultivars You can also define PFTs whose members are cultivars instead of species. This is designed for analyses where you want to want to perform meta-analysis on within-species comparisons (e.g. cultivar evaluation in an agricultural model) but may be useful for other cases when you want to specify different priors for some member of a species. You cannot associate both species and cultivars with the same PFT, but the cultivars in a cultivar PFT may come from different species, potentially including all known cultivars from some of the species, if you wish to and have thought about how to interpret the results. It is not yet possible to add a cultivar PFT through the BETYdb web interface. See this GithHub comment for an example of how to define one manually in PostgreSQL. 20.1.9 Adding Priors for Each Variable In addition to adding species, a PFT is defined in PEcAn by the list of variables associated with the PFT. PEcAn takes a fundamentally Bayesian approach to representing model parameters, so variables are not entered as fixed constants but as prior probability distributions. There are a wide variety of priors already defined in the PEcAn database that often range from very diffuse and generic to very informative priors for specific PFTs. These pre-existing prior distributions can be added to a PFT. Navigate to the PFT from Data > PFTs and selecting the edit button in the Actions column for the chosen PFT. Click on “View Related Priors” button and search through the list for desired prior distributions. The list can be filtered by adding terms into the search box. Add a prior to the PFT by clicking on the far left button for the desired prior, changing it to an X. Save this by scrolling to the bottom of the PFT page and hitting the Update button. 20.1.9.1 Creating new prior distributions A new prior distribution can be created for a pre-existing variable, if a more constrained or specific one is known. Select Data > Priors then “New Prior” In the Citation box, type in or select an existing reference that indicates how the prior was defined. There are a number of unpublished citations in current use that simply state the expert opinion of an individual Fill the Variable box by typing in part or all of a pre-existing variable’s name and selecting it The Phylogeny box allows one to specify what taxonomic grouping the prior is defined for, at it is important to note that this is just for reference and doesn’t have to be specified in any standard way nor does it have to be monophyletic (i.e. it can be a functional grouping) The prior distribution is defined by choosing an option from the drop-down Distribution box, and then specifying values for both Parameter a and Parameter b. The exact meaning of the two parameters depends on the distribution chosen. For example, for the Normal distribution a and b are the mean and standard deviation while for the Uniform they are the minimum and maximum. All parameters are defined based on their standard parameterization in the R language Specify the prior sample size in N if the prior is based on observed data (independent of data in the PEcAn database) When this is done, scroll down and hit the Create button The new prior distribution can then be added a PFT as described in the “Adding Priors for Each Variable” section. 20.1.9.2 Creating new variables It is important to note that the priors are defined for the variable name and units as specified in the Variables table. If the variable name or units is different within the model it is the responsibility of write.configs.MODEL function to handle name and unit conversions (see Interface Modules below). This can also include common but nonlinear transformations, such as converting SLA to LMA or changing the reference temperature for respiration rates. To add a new variable, select Data > Variables and click the New Variable button. Fill in the Name field with the desired name for the variable and the units in the Units field. There are additional fields, such as Standard Units, Notes, and Description, that can be filled out if desired. When done, hit the Create button. The new variable can be used to create a prior distribution for it as in the “Creating new prior distributions” section. 20.1.10 Interface Modules 20.1.10.1 Setting up the module directory (required) PEcAn assumes that the interface modules are available as an R package in the models directory named after the model in question. The simplest way to get started on that R package is to make a copy the template directory in the pecan/models folder and re-name it to the name of your model. In the code, filenames, and examples below you will want to substitute the word MODEL for the name of your model (note: R is case-sensitive). If you do not want to write the interface modules in R then it is fairly simple to set up the R functions describe below to just call the script you want to run using R’s system command. Scripts that are not R functions should be placed in the inst folder and R can look up the location of these files using the function system.file which takes as arguments the local path of the file within the package folder and the name of the package (typically PEcAn.MODEL). For example "],["example-met-conversion-wrapper-function.html", "20.2 Example met conversion wrapper function", " 20.2 Example met conversion wrapper function met2model.MODEL <- function(in.path, in.prefix, outfolder, start_date, end_date){ myMetScript <- system.file(“inst/met2model.MODEL.sh”, “PEcAn.MODEL”) system(paste(myMetScript, file.path(in.path, in.prefix), outfolder, start_date, end_date)) } would execute the following at the Linux command line inst/met2model.MODEL.sh in.path/in.prefix outfolder start_date end_date ` 20.2.0.1 DESCRIPTION Within the module folder open the DESCRIPTION file and change the package name to PEcAn.MODEL. Fill out other fields such as Title, Author, Maintainer, and Date. 20.2.0.2 NAMESPACE Open the NAMESPACE file and change all instances of MODEL to the name of your model. If you are not going to implement one of the optional modules (described below) at this time then you will want to comment those out using the pound sign #. For a complete description of R NAMESPACE files see here. If you create additional functions in your R package that you want to be used make sure you include them in the NAMESPACE as well (internal functions don’t need to be declared) 20.2.0.3 Building the package Once the package is defined you will then need to add it to the PEcAn build scripts. From the root of the pecan directory, go into the scripts folder and open the file build.sh. Within the section of code that includes PACKAGES= add model/MODEL to the list of packages to compile. If, in writing your module, you add any other R packages to the system you will want to make sure those are listed in the DESCRIPTION and in the script scripts/install.dependencies.R. Next, from the root pecan directory open all/DESCRIPTION and add your model package to the Suggests: list. At any point, if you want to check if PEcAn can build your MODEL package successfully, just go to the linux command prompt and run scripts/build.sh. You will need to do this before the system can use these packages. 20.2.0.4 write.config.MODEL (required) This module performs two primary tasks. The first is to take the list of parameter values and model input files that it receives as inputs and write those out in whatever format(s) the MODEL reads (e.g. a settings file). The second is to write out a shell script, jobs.sh, which, when run, will start your model run and convert its output to the PEcAn standard (netCDF with metadata currently equivalent to the MsTMIP standard). Within the MODEL directory take a close look at inst/template.job and the example write.config.MODEL to see an example of how this is done. It is important that this script writes or moves outputs to the correct location so that PEcAn can find them. The example function also shows an example of writing a model-specific settings/config file, also by using a template. You are encouraged to read the section above on defining PFTs before writing write.config.MODEL so that you understand what model parameters PEcAn will be passing you, how they will be named, and what units they will be in. Also note that the (optional) PEcAn input/driver processing scripts are called by separate workflows, so the paths to any required inputs (e.g. meteorology) will already be in the model-specific format by the time write.config.MODEL receives that info. 20.2.0.5 Output Conversions The module model2netcdf.MODEL converts model output into the PEcAn standard (netCDF with metadata currently equivalent to the MsTMIP standard). This function was previously required, but now that the conversion is called within jobs.sh it may be easier for you to convert outputs using other approaches (or to just directly write outputs in the standard). Whether you implement this function or convert outputs some other way, please note that PEcAn expects all outputs to be broken up into ANNUAL files with the year number as the file name (i.e. YEAR.nc), though these files may contain any number of scalars, vectors, matrices, or arrays of model outputs, such as time-series of each output variable at the model’s native timestep. Note: PEcAn reads all variable names from the files themselves so it is possible to add additional variables that are not part of the MsTMIP standard. Similarly, there are no REQUIRED output variables, though time is highly encouraged. We are shortly going establish a canonical list of PEcAn variables so that if users add additional output variables they become part of the standard. We don’t want two different models to call the same output with two different names or different units as this would prohibit the multi-model syntheses and comparisons that PEcAn is designed to facilitate. 20.2.0.6 met2model.MODEL met2model.MODEL(in.path, in.prefix, outfolder, start_date, end_date) Converts meteorology input files from the PEcAn standard (netCDF, CF metadata) to the format required by the model. This file is optional if you want to load all of your met files into the Inputs table as described in How to insert new Input data, which is often the easiest way to get up and running quickly. However, this function is required if you want to benefit from PEcAn’s meteorology workflows and model run cloning. You’ll want to take a close look at [Adding-an-Input-Converter] to see the exact variable names and units that PEcAn will be providing. Also note that PEcAn splits all meteorology up into ANNUAL files, with the year number explicitly included in the file name, and thus what PEcAn will actually be providing is in.path, the input path to the folder where multiple met files may stored, and in.prefix, the start of the filename that precedes the year (i.e. an individual file will be named <in.prefix>.YEAR.nc). It is valid for in.prefix to be blank. The additional REQUIRED arguments to met2model.MODEL are outfolder, the output folder where PEcAn wants you to write your meteorology, and start_date and end_date, the time range the user has asked the meteorology to be processed for. 20.2.0.7 Commit changes Once the MODEL modules are written, you should follow the Using-Git instructions on how to commit your changes to your local git repository, verify that PEcAn compiles using scripts/build.sh, push these changes to Github, and submit a pull request so that your model module is added to the PEcAn system. It is important to note that while we encourage users to make their models open, adding the PEcAn interface module to the Github repository in no way requires that the model code itself be made public. It does, however, allow anyone who already has a copy of the model code to use PEcAn so we strongly encourage that any new model modules be committed to Github. "],["NewInput.html", "20.3 Adding input data", " 20.3 Adding input data 20.3.1 Input records in BETY All model input data or data used for model calibration/validation must be registered in the BETY database. Before creating a new Input record, you must make sure that the format type of your data is registered in the database. If you need to make a new format record, see Creating a new format record in BETY. 20.3.2 Create a database file record for the input data An input record contains all the metadata required to identify the data, however, this record does not include the location of the data file. Since the same data may be stored in multiple places, every file has its own dbfile record. From your BETY interface: Create a DBFILES entry for the path to the file From the menu click RUNS then FILES Click “New File” Select the machine your file is located at Fill in the File Path where your file is located (aka folder or directory) NOT including the name of the file itself Fill in the File Name with the name of the file itself. Note that some types of input records will refer to be ALL the files in a directory and thus File Name can be blank Click Update 20.3.3 Creating a new Input record in BETY From your BETY interface: Create an INPUT entry for your data From the menu click RUNS then INPUTS Click “New Input” Select the SITE that this data is associated with the input data set Other required fields are a unique name for the input, the start and end dates of the data set, and the format of the data. If the data is not in a currently known format you will need to create a NEW FORMAT and possibly a new input converter. Instructions on how to do add a converter can be found here Input conversion. Instructions on how to add a format record can be found here Parent ID is an optional variable to indicated that one dataset was derived from another. Click “Create” Associate the DBFILE with the INPUT In the RUNS -> INPUTS table, search and find the input record you just created Click on the EDIT icon Select “View related Files” In the Search window, search for the DBFILE you just created Once you have found the DBFILE, click on the “+” icon to add the file Click on “Update” at the bottom when you are done. 20.3.4 Adding a new input converter Three Types of data conversions are discussed below: Meteorological data, Vegetation data, and Soil data. Each section provides instructions on how to convert data from their raw formats into a PEcAn standard format, whether it be from a database or if you have raw data in hand. Also, see PEcAn standard formats. 20.3.4.1 Meterological Data 20.3.4.1.1 Adding a function to PEcAn to convert a met data source In general, you will need to write a function to download the raw met data and one to convert it to the PEcAn standard. Downloading raw data function are named download.<source>.R. These functions are stored within the PEcAn directory: /modules/data.atmosphere/R. Conversion function from raw to standard are named met2CF.<source>.R. These functions are stored within the PEcAn directory: /modules/data.atmosphere/R. Current Meteorological products that are coupled to PEcAn can be found in our Available Meteorological Drivers page. Note: Unless you are also adding a new model, you will not need to write a script to convert from PEcAn standard to PEcAn models. Those conversion scripts are written when a model is added and can be found within each model’s PEcAn directory. Standards dimesion, names, nad units can be found here: Input Standards 20.3.4.1.2 Adding Single-Site Specific Meteorological Data Perhaps you have meteorological data specific to one site, with a unique format that you would like to add to PEcAn. Your steps would be to: 1. write a script or function to convert your files into the netcdf PEcAn standard 2. insert that file as an input record for your site following these instructions 20.3.4.1.3 Processing Met data outside of the workflow using PEcAn functions Perhaps you would like to obtain data from one of the sources coupled to PEcAn on its own. To do so you can run PEcAn functions on their own. 20.3.4.1.3.1 Example 1: Processing data from a database Download Amerifluxlbl from Niwot Ridge for the year 2004: raw.file <-PEcAn.data.atmosphere::download.AmerifluxLBL(sitename = "US-NR1", outfolder = ".", start_date = "2004-01-01", end_date = "2004-12-31") Using the information returned as the object raw.file you will then convert the raw files into a standard file. Open a connection with BETY. You may need to change the host name depending on what machine you are hosting BETY. You can find the hostname listed in the machines table of BETY. con <- PEcAn.DB::db.open( params = list( driver = RPostgres::Postgres(), dbname = 'bety', host ='localhost', user = "bety", password = "bety") ) Next you will set up the arguments for the function in.path <- '.' in.prefix <- raw.file$dbfile.name outfolder <- '.' format.id <- 5000000002 format <- PEcAn.DB::query.format.vars(format.id=format.id,bety = con) lon <- -105.54 lat <- 40.03 format$time_zone <- "America/Chicago" Note: The format.id can be pulled from the BETY database if you know the format of the raw data. Once these arguments are defined you can execute the met2CF.csv function PEcAn.data.atmosphere::met2CF.csv(in.path = in.path, in.prefix =in.prefix, outfolder = ".", start_date ="2004-01-01", end_date = "2004-12-01", lat= lat, lon = lon, format = format) 20.3.4.1.3.2 Example 2: Processing data from data already in hand If you have Met data already in hand and you would like to convert into the PEcAn standard follow these instructions. Update BETY with file record, format record and input record according to this page How to Insert new Input Data If your data is in a csv format you can use the met2CF.csvfunction to convert your data into a PEcAn standard file. Open a connection with BETY. You may need to change the host name depending on what machine you are hosting BETY. You can find the hostname listed in the machines table of BETY. con <- PEcAn.DB::db.open( params = list( driver = RPostgres::Postgres(), dbname = 'bety', host ='localhost', user = "bety", password = "bety") ) Prepare the arguments you need to execute the met2CF.csv function in.path <- 'path/where/the/raw/file/lives' in.prefix <- 'prefix_of_the_raw_file' outfolder <- 'path/to/where/you/want/to/output/thecsv/' format.id <- formatid of the format your created format <- PEcAn.DB::query.format.vars(format.id=format.id, bety = con) lon <- longitude of your site lat <- latitude of your site format$time_zone <- time zone of your site start_date <- Start date of your data in "y-m-d" end_date <- End date of your data in "y-m-d" Next you can execute the function: PEcAn.data.atmosphere::met2CF.csv(in.path = in.path, in.prefix =in.prefix, outfolder = ".", start_date = start_date, end_date = end_date, lat= lat, lon = lon, format = format) 20.3.4.2 Vegetation Data Vegetation data will be required to parameterize your model. In these examples we will go over how to produce a standard initial condition file. The main function to process cohort data is the ic_process.R function. As of now however, if you require pool data you will run a separate function, pool_ic_list2netcdf.R. 20.3.4.2.0.1 Example 1: Processing Veg data from data in hand. In the following example we will process vegetation data that you have in hand using PEcAn. First, you’ll need to create a input record in BETY that will have a file record and format record reflecting the location and format of your file. Instructions can be found in our How to Insert new Input Data page. Once you have created an input record you must take note of the input id of your record. An easy way to take note of this is in the URL of the BETY webpage that shows your input record. In this example we use an input record with the id 1000013064 which can be found at this url: https://psql-pecan.bu.edu/bety/inputs/1000013064# . Note that this is the Boston University BETY database. If you are on a different machine, your url will be different. With the input id in hand you can now edit a pecan XML so that the PEcAn function ic_process will know where to look in order to process your data. The inputs section of your pecan XML will look like this. As of now ic_process is set up to work with the ED2 model so we will use ED2 settings and then grab the intermediary Rds data file that is created as the standard PEcAn file. For your Inputs section you will need to input your input id wherever you see the useic flag. <inputs> <css> <source>FFT</source> <output>css</output> <username>pecan</username> <id>1000013064</id> <useic>TRUE</useic> <metadata> <trk>1</trk> <age>70</age> <area>400</area> </metadata> </css> <pss> <source>FFT</source> <output>pss</output> <username>pecan</username> <id>1000013064</id> <useic>TRUE</useic> </pss> <site> <source>FFT</source> <output>site</output> <username>pecan</username> <id>1000013064</id> <useic>TRUE</useic> </site> <met> <source>CRUNCEP</source> <output>ED2</output> </met> <lu> <id>294</id> </lu> <soil> <id>297</id> </soil> <thsum> <id>295</id> </thsum> <veg> <id>296</id> </veg> </inputs> This IC workflow also supports generating ensembles of initial conditions from posterior estimates of DBH. To do this the tags below can be inserted to the pecan.xml: <css> <source>PalEON</source> <output>css</output> <id>1000015682</id> <useic>TRUE</useic> <ensemble>20</ensemble> <metadata> <area>1256.637</area> <n.patch>3</n.patch> </metadata> </css> Here the id should point to a file that has MCMC samples to generate the ensemble from. The number between the <ensemble> tag defines the number of ensembles requested. The workflow will populate the settings list run$inputs tag with ensemble member information. E.g.: <inputs> <css> <path1>...</path1> <path2>...</path2> <path3>...</path3> ... <pathN>...</pathN> </css> <pss> <path> <path1>...</path1> <path2>...</path2> <path3>...</path3> ... <pathN>...</pathN> </path> </pss> <site> <path> <path1>...</path1> <path2>...</path2> <path3>...</path3> ... <pathN>...</pathN> </path> </site> <met>...</met> <lu>...</lu> <soil>...</soil> <thsum>...</thsum> <veg>...</veg> </inputs> Once you edit your PEcAn.xml you can than create a settings object using PEcAn functions. Your pecan.xml must be in your working directory. settings <- PEcAn.settings::read.settings("pecan.xml") settings <- PEcAn.settings::prepare.settings(settings, force=FALSE) You can then execute the ic_process function to convert data into a standard Rds file: input <- settings$run$inputs dir <- "." ic_process(settings, input, dir, overwrite = FALSE) Note that the argument dir is set to the current directory. You will find the final ED2 file there. More importantly though you will find the .Rds file within the same directory. 20.3.4.2.0.2 Example 3 Pool Initial Condition files If you have pool vegetation data, you’ll need the pool_ic_list2netcdf.R function to convert the pool data into PEcAn standard. The function stands alone and requires that you provide a named list of netcdf dimensions and values, and a named list of variables and values. Names and units need to match the standard_vars.csv table found here. #Create a list object with necessary dimensions for your site input<-list() dims<- list(lat=-115,lon=45, time= 1) variables<- list(SoilResp=8,TotLivBiom=295) input$dims <- dims input$vals <- variables Once this is done, set outdir to where you’d like the file to write out to and a siteid. Siteid in this can be used as an file name identifier. Once part of the automated workflow siteid will reflect the site id within the BET db. outdir <- "." siteid <- 772 pool_ic_list2netcdf(input = input, outdir = outdir, siteid = siteid) You should now have a netcdf file with initial conditions. 20.3.4.3 Soil Data 20.3.4.3.0.1 Example 1: Converting Data in hand Local data that has the correct names and units can easily be written out in PEcAn standard using the function soil2netcdf. soil.data <- list(volume_fraction_of_sand_in_soil = c(0.3,0.4,0.5), volume_fraction_of_clay_in_soil = c(0.3,0.3,0.3), soil_depth = c(0.2,0.5,1.0)) soil2netcdf(soil.data,"soil.nc") At the moment this file would need to be inserted into Inputs manually. By default, this function also calls soil_params, which will estimate a number of hydraulic and thermal parameters from texture. Be aware that at the moment not all model couplers are yet set up to read this file and/or convert it to model-specific formats. 20.3.4.3.0.2 Example 2: Converting PalEON data In addition to location-specific soil data, PEcAn can extract soil texture information from the PalEON regional soil product, which itself is a subset of the MsTMIP Unified North American Soil Map. If this product is installed on your machine, the appropriate step in the do_conversions workflow is enabled by adding the following tag under <inputs> in your pecan.xml <soil> <id>1000012896</id> </soil> In the future we aim to extend this extraction to a wider range of soil products. 20.3.4.3.0.3 Example 3: Extracting soil properties from gSSURGO database In addition to location-specific soil data, PEcAn can extract soil texture information from the gSSURGO data product. This product needs no installation and it extract soil proeprties for the lower 48 states in U.S. In order to let the pecan know that you’re planning to use gSSURGO, you can the following XML tag under input in your pecan xml file. <inputs> <soil> <source>gSSURGO</source> </soil> </inputs> "],["adding-data-web.html", "20.4 Pecan Data Ingest via Web Interface", " 20.4 Pecan Data Ingest via Web Interface This tutorial explains the process of ingesting data into PEcAn via our Data-Ingest Application. In order to ingest data, the users must first select data that they wish to upload. Then, they enter metadata to help PEcAn parse and load the data into the main PEcAn workflow. 20.4.1 Loading Data 20.4.1.1 Selecting Ingest Method The Data-Ingest application is capable of loading data from the DataONE data federation and from the user’s local machine. The first step in the workflow is therefore to select an upload method. The application defaults to uploading from DataONE. To upload data from a local device, simply select the radio button titled Local Files. 20.4.1.2 DataONE Upload Example The DataONE download feature allows the user to download data at a given doi or DataONE specific package id. To do so, enter the doi or identifier in the Import From DataONE field and select download. The download process may take a couple of minutes to run depending on the number of files in the dataONE package. This may be a convenient option if the user does not wish to download files directly to their local machine. Once the files have been successfully downloaded from DataONE, they are displayed in a table. Before proceeding to the next step, the user can select a file to ingest by clicking on the corresponding row in the data table. 20.4.2 Local Upload Example To upload local files, the user should first select the Local Files button. From there, the user can upload files from their local machines by selecting Browse or by dragging and dropping files into the text box. The files will begin uploading automatically. From there, the user should select a file to ingest and then select the Next Step button. After this step, the workflow is identical for both methods. However, please note that if it becomes necessary to switch from loading data via DataONE to uploading local files after the first step, please restart the application. ### 2. Creating an Input Record Creating an input record requires some basic metadata about the file that is being ingested. Each entry field is briefly explained below. Site: To link the selected file with a site, the user can scroll or type to search all the sites in PEcAn. See Example: Parent: To link the selected file with another dataset, type to search existing datasets in the Parent field. Name: this field should be autofilled by selecting a file in step 1. Format: If the selected file has an existing format name, the user can search and select in the Format field. If the selected file’s format is not already in pecan, the user can create a new format by selecting Create New Format. Once this new format is created, it will automatically populate the Format box and the Current Mimetype box (See Section 3). Mimetype: If the format already exists, select an existing mimetype. Start and End Date and Time: Inputs can be entered manually or by using the user interface. See example Notes: Describe the data that is being uploaded. Please include any citations or references. 20.4.3 3. Creating a format record If it is necessary to add a new format to PEcAn, the user should fill out the form attached to the Create New Format button. The inputs to this form are described below: Mimetype: type to search existing mimetypes. If the mimetype is not in that list, please click on the link Create New Mimetype and create a new mimetype via the BETY website. New Format Name: Add the name of the new format. Please exclude spaces from the name. Instead please use underscores “_“. Header: If there is space before the first line of data in the dataset, please select Yes Skip: The number of lines in the header that should be skipped before the data. Please enter notes that describe the format. Example: ### 4. Formats_Variables Record The final step in the ingest process is to register a formats-variables record. This record links pecan variables with variables from the selected data. Variable: PEcAn variable that is equivalent to variable in selected file. Name: The variable name in the imported data need only be specified if it differs from the BETY variable name. Unit: Should be in a format parseable by the udunits library and need only be secified if the units of the data in the file differ from the BETY standard. Storage Type: Storage type need only be specified if the variable is stored in a format other than would be expected (e.g. if numeric values are stored as quoted character strings). Additionally, storage_type stores POSIX codes that are used to store any time variables (e.g. a column with a 4-digit year would be %Y). Column Number: Vector of integers that list the column numbers associated with variables in a dataset. Required for text files that lack headers. Finally, the path to the ingest data is displayed in the Select Files box. "],["NewFormat.html", "20.5 Creating a new format", " 20.5 Creating a new format 20.5.1 Formats in BETY The PEcAn database keeps track of all the input files passed to models, as well as any data used in model validation or data assimilation. Before we start to register these files with PEcAn we need to define the format these files will be in. The main goal is to take all the meta-data we have about a data file and create a record of it that pecan can use as a guide when parsing the data file. This information is stored in a Format record in the bety database. Make sure to read through the current Formats before deciding to make a new one. 20.5.2 Creating a new format in BETY If the Format you are looking for is not available, you will need to create a new record. Before entering information into the database, you need to be able to answer the following questions about your data: What is the file MIME type? We have a suit of functions for loading in data in open formats such as CSV, txt, netCDF, etc. PEcAn has partnered with the NCSA BrownDog project to create a service that can read and convert as many data formats as possible. If your file type is less common or a proprietary type, you can use the BrownDog DAP to convert it to a format that can be used with PEcAn. If BrownDog cannot convert your data, you will need to contact us about writing a data specific load function. What variables does the file contain? What are the variables named? What are the variable units? How do the variable names and units in the data map to PEcAn variables in the BETY database? See below for an example. It is most likely that you will NOT need to add variables to BETY. However, identifying the appropriate variables matches in the database may require some work. We are always available to help answer your questions. Is there a timestamp on the data? What are the units of time? Here is an example using a fake dataset: example_data This data started out as an excel document, but was saved as a CSV file. To create a Formats record for this data, in the web interface of BETY, select Runs > Formats and click New Format. You will need to fill out the following fields: MIME type: File type (you can search for other formats in the text field) Name: The name of your format (this can be whatever you want) Header: Boolean that denotes whether or not your data contains a header as the first line of the data. (1 = TRUE, 0 = FALSE) Skip: The number of lines above the data that should be skipped. For example, metadata that should not be included when reading in the data or blank spaces. Notes: Any additional information about the data such as sources and citations. Here is the Formats record for the example data: format_record_1 When you have finished this section, hit Create. The final record will be displayed on the screen. 20.5.2.1 Formats -> Variables After a Format entry has been created, you are encouraged to edit the entry to add relationships between the file’s variables and the Variables table in PEcAn. Not only do these relationships provide meta-data describing the file format, but they also allow PEcAn to search and (for some MIME types) read files. To enter this data, select Edit Record and on the edit screen select View Related Variable. Here is the record for the example data after adding related variables: format_record_2 20.5.2.1.1 Name and Unit For each variable in the file you will want at a minimum to specify the NAME of the variable within your file and match that to the equivalent Variable in the pulldown. Make sure to search for your variables under Data > Variables before suggesting that we create a new variable record. This may not always be a straightforward process. For example bety contains a record for Net Primary Productivity: var_record This record does not have the same variable name or the same units as NPP in the example data. You may have to do some reading to confirm that they are the same variable. In this case - Both the data and the record are for Net Primary Productivity (the notes section provides additional resources for interpreting the variable.) - The units of the data can be converted to those of the vairiable record (this can be checked by running udunits2::ud.are.convertible(\"g C m-2 yr-1\", \"Mg C ha-1 yr-1\")) Differences between the data and the variable record can be accounted for in the data Formats record. Under Variable, select the variable as it is recorded in bety. Under Name, write the name the variable has in your data file. Under Unit, write the units the variable has in your data file. NOTE: All units must be written in a udunits compliant format. To check that your units can be read by udunits, in R, load the udunits2 package and run udunits2::is.parseable(\"g C m-2 yr-1\") If the name or the units are the same, you can leave the Name and Unit fields blank. This is can be seen with the variable LAI. 20.5.2.1.2 Storage Type Storage Type only needs to be specified if the variable is stored in a format other than what would be expected (e.g. if numeric values are stored as quoted character strings). One such example is time variables. PEcAn converts all dates into POSIX format using R functions such as strptime. These functions require that the user specify the format in which the date is written. The default is \"%Y-%m-%d %H:%M:%S\" which would look like \"2017-01-01 00:00:00\" A list of date formats can be found in the R documentation for the function strptime Below are some commonly used codes: %d Day of the month as decimal number (01–31). %D Date format such as %m/%d/%y. %H Hours as decimal number (00–23). %m Month as decimal number (01–12). %M Minute as decimal number (00–59). %S Second as integer (00–61), allowing for up to two leap-seconds (but POSIX-compliant implementations will ignore leap seconds). %T Equivalent to %H:%M:%S. %y Year without century (00–99). On input, values 00 to 68 are prefixed by 20 and 69 to 99 by 19 – that is the behaviour specified by the 2004 and 2008 POSIX standards, but they do also say ‘it is expected that in a future version the default century inferred from a 2-digit year will change’. %Y Year with century. 20.5.2.1.3 Column Number If your data is in text format with variables in a standard order then you can specify the Column Number for the variable. This is required for text files that lack headers. 20.5.2.2 Retrieving Format Information To acquire Format information from a Format record, use the R function query.format.vars 20.5.2.2.1 Inputs bety: connection to BETY input.id=NA and/or format.id=NA: Input or Format record ID from BETY At least one must be specified. Defaults to format.id if both provided. var.ids=NA: optional vector of variable IDs. If provided, limits results to these variables. 20.5.2.2.2 Output R list object containing many things. Fill this in. "],["NewBenchmark.html", "20.6 Creating a new benchmark reference run", " 20.6 Creating a new benchmark reference run The purpose of the reference run record in BETY is to store all the settings from a run that are necessary in exactly recreating it. The pecan.xml file is the home of absolutely all the settings for a particular run in pecan. However, much of the information in the pecan.xml file is server and user specific and more importantly, the pecan.xml files are stored on individual servers and may not be available to the public. When a run that is performed using pecan is registered as a reference run, the settings that were used to make that run are made available to all users through the database. All completed runs are not automatically registered as reference runs. To register a run, navigate to the benchmarking section of the workflow visualizations Shiny app. "],["editing-records.html", "20.7 Editing records", " 20.7 Editing records Models Species PFTs Traits Inputs DB files Variables Formats (Link each section to relevant Bety tables) "],["troubleshooting-and-debugging-pecan.html", "21 Troubleshooting and Debugging PEcAn ", " 21 Troubleshooting and Debugging PEcAn "],["cookies-and-pecan-web-pages.html", "21.1 Cookies and pecan web pages", " 21.1 Cookies and pecan web pages You may need to disable cookies specifically for the pecan webserver in your browser. This shouldn’t be a problem running from the virtual machine, but your installation of php can include a ‘PHPSESSID’ that is quite long, and this can overflow the params field of the workflows table, depending on how long your hostname, model name, site name, etc are. "],["warning-mkdir-function.mkdir-no-such-file-or-directory.html", "21.2 Warning: mkdir() [function.mkdir]: No such file or directory", " 21.2 Warning: mkdir() [function.mkdir]: No such file or directory If you are seeing: Warning: mkdir() [function.mkdir]: No such file or directory in /path/to/pecan/web/runpecan.php at line 169 it is because you have used a relative path for $output_folder in system.php. "],["after-creating-a-new-pft-the-tag-for-pft-not-passed-to-config.xml-in-ed.html", "21.3 After creating a new PFT the tag for PFT not passed to config.xml in ED", " 21.3 After creating a new PFT the tag for PFT not passed to config.xml in ED This is a result of the rather clunky way we currently have adding PFTs to PEcAn. This is happening because you need to edit the ./pecan/models/ed/data/pftmapping.csv file to include your new PFTs. This is what the file looks like: PEcAn;ED ebifarm.acru;11 ebifarm.acsa3;11 ... You just need to edit this file (in a text editor, no Excel) and add your PFT names and associated number to the end of the file. Once you do this, recompile PEcAn and it should then work for you. We currently need to reference this file in order to properly set the PFT number and maintain internal consistency between PEcAn and ED2. "],["debugging.html", "21.4 Debugging", " 21.4 Debugging How to identify the source of a problem. 21.4.1 Using tests/workflow.R This script, along with model-specific settings files in the tests folder, provide a working example. From inside the tests folder, R CMD --vanilla -- --settings pecan.<model>.xml < workflow.R should work. The next step is to add debugonce(<broken.function.name>) before running the test workflow. This allows you can step through the function and evaluate the different objects as they are created and/or transformed. See tests README for more information. "],["useful-scripts.html", "21.5 Useful scripts", " 21.5 Useful scripts The following scripts (in qaqc/vignettes identify, respectively: relationships among functions across packages function inputs and outputs (e.g. that will identify which functions and outputs are used in a workflow). "],["database.html", "22 BETY Database Administration", " 22 BETY Database Administration This section provides additional details about the BETY database used by PEcAn. It will discuss best practices for setting up the BETY database, how to backup the database and how to restore the database. "],["database-setup.html", "22.1 Best practices", " 22.1 Best practices When using the BETY database in non testing mode, it is best not to use the default users. This is accomplished when running the initialize of the database. When the database is initally created the database will be created with some default users (best known is the carya user) as well as the guestuser that can be used in the BETY web application. To disable these users you will either need to disable the users from the web interface, or you can reinitialize the database and remove the -u flag from the command line (the -u flag will create the default users). To disable the guestuser as well you can remove the -g flag from the command line, or disable the account from BETY. The default installation of BETY and PEcAn will assume there is a database called bety with a default username and password. The default installation will setup the database account to not have any superuser abilities. It is also best to limit access to the postgres database from trusted hosts, either by using firewalls, or configuring postgresql to only accept connections from a limited set of hosts. "],["backup-of-bety-database.html", "22.2 Backup of BETY database", " 22.2 Backup of BETY database It is good practice to make sure you backup the BETY database. Just creating a copy of the files on disk is not enough to ensure you have a valid backup. Most likely if you do this you will end up with a corrupted backup of the database. To backup the database you can use the pg_dump command, which will make sure the database id backed up in a consistent state. You can run sudo -u postgres pg_dump -d bety -Z 9 -f bety.sql.gz, this will create a compressed file that can be used to resotore the database. In the PEcAn distribution in scripts folder there is a script called backup.bety.sh. This script will create the backup of the database. It will create multiple backups allowing you to restore the database from one of these copies. The database will be backed up to one of the following files: - bety-d-X, daily backup, where X is the day of the month. - bety-w-X, weekly backup, where X is the week number in the year - bety-m-X, montly backup, where X is the month of the year - bety-y-X, yearly backup, where X is the actual year. Using this scheme, we can restore the database using any of the files generated. It is recommeneded to run this script using a cronjob at midnight such that you have a daily backup of the database and do not have to remember to create these backups. When running this script (either cron or by hand) make sure to place the backups on a different machine than the machine that holds the database in case of a larger system failure. "],["restore-of-bety-database.html", "22.3 Restore of BETY database", " 22.3 Restore of BETY database Hopefully this section will never need to be used. Following are 5 steps that have been used to restore the database. Before you start it is worth it to read up online a bit on restoring the database as well as join the slack channel and ask any of the people there for help. stop apache (BETY/PEcAn web apps) service httpd stop or service apache2 stop backup database (you know just incase) pg_dump -d bety > baddb.sql drop database sudo -u postgres psql -c 'drop database bety' create database sudo -u postgres psql -c 'create database bety with owner bety' load database (assuming dump is called bety.sql.gz) zcat bety.sql.gz | grep -v search_path | sudo -u postgres psql -d bety start apache again service httpd start or service apache2 start If during step 5 there is a lot of errors, it is helpful to add -v ON_ERROR_STOP=1 to the end of the command. This will stop the restore at the first error and will help with debugging the issue. "],["workflow-modules.html", "23 Workflow modules", " 23 Workflow modules NOTE: As of PEcAn 1.2.6 – needs to be updated significantly "],["overview-1.html", "23.1 Overview", " 23.1 Overview Workflow inputs and outputs (click to open in new page, then zoom). Code used to generate this image is provided in qaqc/vignettes/module_output.Rmd "],["load-settings.html", "23.2 Load Settings", " 23.2 Load Settings 23.2.1 read.settings(\"/home/pecan/pecan.xml\") loads settings create directories generates new xml, put in output folder "],["query-database.html", "23.3 Query Database", " 23.3 Query Database 23.3.1 get.trait.data() Queries the database for both the trait data and prior distributions associated with the PFTs specified in the settings file. The list of variables that are queried is determined by what variables have priors associated with them in the definition of the pft. Likewise, the list of species that are associated with a PFT determines what subset of data is extracted out of all data matching a given variable name. "],["meta-analysis.html", "23.4 Meta Analysis", " 23.4 Meta Analysis 23.4.1 run.meta.analysis() The meta-analysis code begins by distilling the trait.data to just the values needed for the meta-analysis statistical model, with this being stored in madata.Rdata. This reduced form includes the conversion of all error statistics into precision (1/variance), and the indexing of sites, treatments, and greenhouse. In reality, the core meta-analysis code can be run independent of the trait database as long as input data is correctly formatted into the form shown in madata. The evaluation of the meta-analysis is done using a Bayesian statistical software package called JAGS that is called by the R code. For each trait, the R code will generate a [trait].model.bug file that is the JAGS code for the meta-analysis itself. This code is generated on the fly, with PEcAn adding or subtracting the site, treatment, and greenhouse terms depending upon the presence of these effects in the data itself. Meta-analyses are run, and summary plots are produced. "],["write-configuration-files.html", "23.5 Write Configuration Files", " 23.5 Write Configuration Files 23.5.1 write.configs(model) writes out a configuration file for each model run ** writes 500 configuration files for a 500 member ensemble ** for n traits, writes 6 * n + 1 files for running default Sensitivity Analysis (number can be changed in the pecan settings file) "],["start-runs.html", "23.6 Start Runs", " 23.6 Start Runs 23.6.1 start.runs(model) This code starts the model runs using a model specific run function named start.runs.model. If the ecosystem model is running on a remote server, this module also takes care of all of the communication with the remote server and its run queue. Each of your subdirectories should now have a [run.id].out file in it. One instance of the model is run for each configuration file generated by the previous write configs module. "],["get-model-output.html", "23.7 Get Model Output", " 23.7 Get Model Output 23.7.1 get.model.output(model) This code first uses a model-specific model2netcdf.model function to convert the model output into a standard output format (MsTMIP). Then it extracts the data for requested variables specified in the settings file as settings$ensemble$variable, averages over the time-period specified as start.date and end.date, and stores the output in a file output.Rdata. The output.Rdata file contains two objects, sensitivity.output and ensemble.output, that is the model prediction for the parameter sets specified in sa.samples and ensemble.samples. In order to save bandwidth, if the model output is stored on a remote system PEcAn will perform these operations on the remote host and only return the output.Rdata object. "],["ensemble-analysis.html", "23.8 Ensemble Analysis", " 23.8 Ensemble Analysis 23.8.1 run.ensemble.analysis() This module makes some simple graphs of the ensemble output. Open ensemble.analysis.pdf to view the ensemble prediction as both a histogram and a boxplot. ensemble.ts.pdf provides a timeseries plot of the ensemble mean, meadian, and 95% CI "],["sensitivity-analysis-variance-decomposition.html", "23.9 Sensitivity Analysis, Variance Decomposition", " 23.9 Sensitivity Analysis, Variance Decomposition 23.9.1 run.sensitivity.analysis() This function processes the output of the previous module into sensitivity analysis plots, sensitivityanalysis.pdf, and a variance decomposition plot, variancedecomposition.pdf . In the sensitivity plots you will see the parameter values on the x-axis, the model output on the Y, with the dots being the model evaluations and the line being the spline fit. The variance decomposition plot is discussed more below. For your reference, the R list object, sensitivity.results, stored in sensitivity.results.Rdata, contains all the components of the variance decomposition table, as well as the the input parameter space and splines from the sensitivity analysis (reminder: the output parameter space from the sensitivity analysis was in outputs.R). The variance decomposition plot contains three columns, the coefficient of variation (normalized posterior variance), the elasticity (normalized sensitivity), and the partial standard deviation of each model parameter. This graph is sorted by the variable explaining the largest amount of variability in the model output (right hand column). From this graph identify the top-tier parameters that you would target for future constraint. "],["glossary.html", "23.10 Glossary", " 23.10 Glossary Inputs: data sets that are used, and file paths leading to them Parameters: e.g. info set in settings file Outputs: data sets that are dropped, and the file paths leading to them "],["installation-details.html", "24 Installation details", " 24 Installation details This chapter contains details about installing and maintaining the uncontainerized version of PEcAn on a virtual machine or a server. If you are running PEcAn inside of Docker, many of the particulars will be different and you should refer to the docker chapter instead of this one. "],["pecanvm.html", "24.1 PEcAn Virtual Machine", " 24.1 PEcAn Virtual Machine See also other VM related documentation sections: Maintaining your PEcAn VM Connecting to the VM via SSH Connecting to bety on the VM via SSh Using Amazon Web Services for a VM (AWS) Creating a Virtual Machine VM Desktop Conversion Install RStudio Desktop The PEcAn virtual machine consists of all of PEcAn pre-compiled within a Linux operating system and saved in a “virtual machine” (VM). Virtual machines allow for running consistent set-ups without worrying about differences between operating systems, library dependencies, compiling the code, etc. Install VirtualBox This is the software that runs the virtual machine. You can find the download link and instructions at http://www.virtualbox.org. NOTE: On Windows you may see a warning about Logo testing, it is okay to ignore the warning. Download the PEcAn VM You can find the download link at http://opensource.ncsa.illinois.edu/projects/artifacts.php?key=PECAN, under the “Files” header. Click the “.ova” file to begin the download. Note that the file is ~7 GB, so this download can take several minutes to hours depending on your connection speed. Also, the VM requires >4 GB of RAM to operate correctly. Please check current usage of RAM and shutdown processes as needed. Import the VM Once the download is complete, open VirtualBox. In the VirtualBox menus, go to “File” → “Import Appliance” and locate the downloaded “.ova” file. For Virtualbox version 5.x: In the Appliance Import Settings, make sure you select “Reinitialize the MAC address of all network cards” (picture below). This is not selected by default and can result in networking issues since multiple machines might claim to have the same network MAC Address. For Virtualbox versions starting with 6.0, there is a slightly different interface (see figure). Select “Generate new MAC addresses for all network adapters” from the MAC Address Policy: NOTE: If you experience network connection difficulties in the VM with this enabled, try re-importing the VM without this setting selected). Finally, click “Import” to build the Virtual Machine from its image. Launch PEcAn Double click the icon for the PEcAn VM. A terminal window will pop up showing the machine booting up which may take a minute. It is done booting when you get to the pecan login: prompt. You do not need to login as the VM behaves like a server that we will be accessing through you web browser. Feel free to minimize the VM window. If you do want to login to the VM, the credentials are as follows: username: carya, password: illinois (after the pecan tree, [Carya illinoinensis][pecan-wikipedia]). Open the PEcAn web interface With the VM running in the background, open any web browser on the same machine and navigate to localhost:6480/pecan/ to start the PEcAn workflow. (NOTE: The trailing backslash may be necessary depending on your browser) To ssh into the VM, open up a terminal on your machine and execute ssh -l carya -p 6422 localhost. Username and password are the same as when you log into the machine. "],["aws-setup.html", "24.2 AWS Setup", " 24.2 AWS Setup ***********Mirror of earlier section in installation section?********************* The following are Mike’s rough notes from a first attempt to port the PEcAn VM to the AWS. This was done on a Mac These notes are based on following the instructions here 24.2.1 Convert PEcAn VM AWS allows upload of files as VMDK but the default PEcAn VM is in OVA format If you haven’t done so already, download the PEcAn VM Split the OVA file into OVF and VMDK files tar xf <ovafile> 24.2.2 Set up an account on AWS After you have an account you need to set up a user and save your access key and secret key In my case I created a user named ‘carya’ Note: the key that ended up working had to be made at https://console.aws.amazon.com/iam/home#security_credential, not the link above. 24.2.3 Install EC2 command line tools wget http://s3.amazonaws.com/ec2-downloads/ec2-api-tools.zip sudo mkdir /usr/local/ec2 sudo unzip ec2-api-tools.zip -d /usr/local/ec2 If need be, download and install JDK export JAVA_HOME=$(/usr/libexec/java_home) export EC2_HOME=/usr/local/ec2/ec2-api-tools-<version> export PATH=$PATH:$EC2_HOME/bin Then set your user credentials as environment variables: export AWS_ACCESS_KEY=xxxxxxxxxxxxxx export AWS_SECRET_KEY=xxxxxxxxxxxxxxxxxxxxxx Note: you may want to add all the variables set in the above EXPORT commands above into your .bashrc or equivalent. 24.2.4 Create an AWS S3 ‘bucket’ to upload VM to Go to https://console.aws.amazon.com/s3 and click “Create Bucket” In my case I named the bucket ‘pecan’ 24.2.5 Upload In the code below, make sure to change the PEcAn version, the name of the bucket, and the name of the region. Make sure that the PEcAn version matches the one you downloaded. Also, you may want to choose a considerably larger instance type. The one chosen below is that corresponding to the AWS Free Tier ec2-import-instance PEcAn_1.2.6-disk1.vmdk --instance-type t2.micro --format VMDK --architecture x86_64 --platform Linux --bucket pecan --region us-east-1 --owner-akid $AWS_ACCESS_KEY --owner-sak $AWS_SECRET_KEY Make sure to note the ID of the image since you’ll need it to check the VM status. Once the image is uploaded it will take a while (typically about an hour) for Amazon to convert the image to one it can run. You can check on this progress by running ec2-describe-conversion-tasks <image.ID> 24.2.6 Configuring the VM On the EC2 management webpage, https://console.aws.amazon.com/ec2, if you select Instances on the left hand side (LHS) you should be able to see your new PEcAn image as an option under Launch Instance. Before launching, you will want to update the firewall to open up additional ports that PEcAn needs – specifically port 80 for the webpage. Port 22 (ssh/sftp) should be open by default. Under “Security Groups” select “Inbound” then “Edit” and then add “HTTP”. Select “Elastic IPs” on the LHS, and “Allocate New Address” in order to create a public IP for your VM. Next, select “Network Interfaces” on the LHS and then under Actions select “Associate Addresses” then choose the Elastic IP you just created. See also http://docs.aws.amazon.com/AmazonVPC/latest/GettingStartedGuide/GetStarted.html 24.2.7 Set up multiple instances (optional) For info on setting up multiple instances with load balancing see: http://docs.aws.amazon.com/ElasticLoadBalancing/latest/DeveloperGuide/gs-ec2VPC.html Select “Load Balancers” on the LHS, click on “Create Load Balancer”, follow Wizard keeping defaults. To be able to launch multiple VMs: Under “Instances” convert VM to an Image. When done, select Launch, enable multiple instances, and associate with the previous security group. Once running, go back to “Load Balancers” and add the instances to the load balancer. Each instance can be accessed individually by it’s own public IP, but external users should access the system more generally via the Load Balancers DNS. 24.2.8 Booting the VM Return to “Instances” using the menu on the LHS. To boot the VM select “Actions” then “Instance State” then “Start”. In the future, once you have the VM loaded and configured this last step is the only one you will need to repeat to turn your VM on and off. The menu provided should specify the Public IP where the VM has launched "],["shiny-setup.html", "24.3 Shiny Setup", " 24.3 Shiny Setup Installing and configuring Shiny for PEcAn authors - Alexey Shiklomanov - Rob Kooper NOTE: Instructions are only tested for CentOS 6.5 and Ubuntu 16.04 NOTE: Pretty much every step here requires root access. 24.3.1 Install the Shiny R package and Shiny server Follow the instructions on the Shiny download page for the operating system you are using. 24.3.2 Modify the shiny configuration file The Shiny configuration file is located in /etc/shiny-server/shiny-server.conf. Comment out the entire file and add the following, replacing <username> with your user name and <location> with the URL location you want for your app. This will allow you to run Shiny apps from your web browser at https://your.server.edu/shiny/your-location run as shiny; server { listen 3838; location /<location>/ { run as <username>; site_dir /path/to/your/shiny/app; log_dir /var/log/shiny-server; directory_index on; } } For example, my configuration on the old test-pecan looks like this. run as shiny; server { listen 3838; location /ashiklom/ { run as ashiklom; site_dir /home/ashiklom/fs-data/pecan/shiny/; log_dir /var/log/shiny-server; directory_index on; } } …and I can access my Shiny apps at, for instance, https://test-pecan.bu.edu/shiny/ashiklom/workflowPlots. You can add as many location <loc> { ... } fields as you would like. run as shiny; server { listen 3838; location /ashiklom/ { ... } location /bety/ { ... } } If you change the configuration, for example to add a new location, you will need to restart Shiny server. If you are setting up a new instance of Shiny, skip this step and continue with the guide, since there are a few more steps to get Shiny working. If there is an instance of Shiny already running, you can restart it with: ## On CentOS sudo service shiny-server stop sudo service shiny-server start ## On Ubuntu sudo systemctl stop shiny-server.service sudo systemctl start shiny-server.service 24.3.3 Set the Apache proxy Create a file with the following name, based on the version of the operating system you are using: Ubuntu 16.04 (pecan1, pecan2, test-pecan) – /etc/apache2/conf-available/shiny.conf CentOS 6.5 (psql-pecan) – /etc/httpd/conf.d/shiny.conf Into this file, add the following: ProxyPass /shiny/ http://localhost:3838/ ProxyPassReverse /shiny/ http://localhost:3838/ RedirectMatch permanent ^/shiny$ /shiny/ 24.3.3.1 Ubuntu only: Enable the new shiny configuration sudo a2enconf shiny This will create a symbolic link to the newly created shiny.conf file inside the /etc/apache2/conf-enabled directory. You can do ls -l /etc/apache2/conf-enabled to confirm that this worked. 24.3.4 Enable and start the shiny server, and restart apache 24.3.4.1 On CentOS sudo ln -s /opt/shiny-server/config/init.d/redhat/shiny-server /etc/init.d sudo service shiny-server stop sudo service shiny-server start sudo service httpd restart You can check that Shiny is running with service shiny-server status. 24.3.4.2 On Ubuntu Enable the Shiny server service. This will make sure Shiny runs automatically on startup. sudo systemctl enable shiny-server.service Restart Apache. sudo apachectl restart Start the Shiny server. sudo systemctl start shiny-server.service If there are problems, you can stop the shiny-server.service with… sudo systemctl stop shiny-server.service …and then use start again to restart it. 24.3.5 Troubleshooting Refer to the log files for shiny (/var/log/shiny-server.log) and httpd (on CentOS, /var/log/httpd/error-log; on Ubuntu, /var/log/apache2/error-log). 24.3.6 Further reading Shiny server configuration reference "],["thredds-setup.html", "24.4 Thredds Setup", " 24.4 Thredds Setup Installing and configuring Thredds for PEcAn authors - Rob Kooper NOTE: Instructions are only tested for Ubuntu 16.04 on the VM, if you have instructions for CENTOS/RedHat please update this documentation NOTE: Pretty much every step here requires root access. 24.4.1 Install the Tomcat 8 and Thredds webapp The Tomcat 8 server can be installed from the default Ubuntu repositories. The thredds webapp will be downloaded and installed from unidata. First step is to install Tomcat 8 and configure it. The flag -Dtds.content.root.path should point to the location of where the thredds folder is located. This needs to be writeable by the user for tomcat. -Djava.security.egd is a special flag to use a different random number generator for tomcat. The default would take to long to generate a random number. apt-get -y install tomcat8 openjdk-8-jdk echo JAVA_OPTS=\\"-Dtds.content.root.path=/home/carya \\${JAVA_OPTS}\\" >> /etc/default/tomcat8 echo JAVA_OPTS=\\"-Djava.security.egd=file:/dev/./urandom \\${JAVA_OPTS}\\" >> /etc/default/tomcat8 service tomcat8 restart Next is to install the webapp. mkdir /home/carya/thredds chmod 777 /home/carya/thredds wget -O /var/lib/tomcat8/webapps/thredds.war ftp://ftp.unidata.ucar.edu/pub/thredds/4.6/current/thredds.war Finally we configure Apache to prox the thredds server cat > /etc/apache2/conf-available/thredds.conf << EOF ProxyPass /thredds/ http://localhost:8080/thredds/ ProxyPassReverse /thredds/ http://localhost:8080/thredds/ RedirectMatch permanent ^/thredds$ /thredds/ EOF a2enmod proxy_http a2enconf thredds service apache2 reload 24.4.1.1 Customize the Thredds server To customize the thredds server for your installation edit the file in /home/carya/thredds/threddsConfig.xml. For example the following file is included in the VM. <?xml version="1.0" encoding="UTF-8"?> <threddsConfig> <!-- all options are commented out in standard install - meaning use default values --> <!-- see http://www.unidata.ucar.edu/software/thredds/current/tds/reference/ThreddsConfigXMLFile.html --> <serverInformation> <name>PEcAn</name> <logoUrl>/pecan/images/pecan_small.jpg</logoUrl> <logoAltText>PEcAn</logoAltText> <abstract>Scientific Data</abstract> <keywords>meteorology, atmosphere, climate, ocean, earth science</keywords> <contact> <name>Rob Kooper</name> <organization>NCSA</organization> <email>kooper@illinois.edu</email> <!--phone></phone--> </contact> <hostInstitution> <name>PEcAn</name> <webSite>http://www.pecanproject.org/</webSite> <logoUrl>/pecan/images/pecan_small.jpg</logoUrl> <logoAltText>PEcAn Project</logoAltText> </hostInstitution> </serverInformation> <!-- The <catalogRoot> element: For catalogs you don't want visible from the /thredds/catalog.xml chain of catalogs, you can use catalogRoot elements. Each catalog root config catalog is crawled and used in configuring the TDS. <catalogRoot>myExtraCatalog.xml</catalogRoot> <catalogRoot>myOtherExtraCatalog.xml</catalogRoot> --> <!-- * Setup for generated HTML pages. * * NOTE: URLs may be absolute or relative, relative URLs must be relative * to the webapp URL, i.e., http://server:port/thredds/. --> <htmlSetup> <!-- * CSS documents used in generated HTML pages. * The CSS document given in the "catalogCssUrl" element is used for all pages * that are HTML catalog views. The CSS document given in the "standardCssUrl" * element is used in all other generated HTML pages. * --> <standardCssUrl>tds.css</standardCssUrl> <catalogCssUrl>tdsCat.css</catalogCssUrl> <openDapCssUrl>tdsDap.css</openDapCssUrl> <!-- * The Google Analytics Tracking code you would like to use for the * webpages associated with THREDDS. This will not track WMS or DAP * requests for data, only browsing the catalog. --> <googleTrackingCode></googleTrackingCode> </htmlSetup> <!-- The <TdsUpdateConfig> element controls if and how the TDS checks for updates. The default is for the TDS to check for the current stable and development release versions, and to log that information in the TDS serverStartup.log file as INFO entries. <TdsUpdateConfig> <logVersionInfo>true</logVersionInfo> </TdsUpdateConfig> --> <!-- The <CORS> element controls Cross-Origin Resource Sharing (CORS). CORS is a way to allow a website (such as THREDDS) to open up access to resources to web pages and applications running on a different domain. One example would be allowing a web-application to use fonts from a separate host. For TDS, this can allow a javascript app running on a different site to access data on a THREDDS server. For more information see: https://en.wikipedia.org/wiki/Cross-origin_resource_sharing The elements below represent defaults. Only the <enabled> tag is required to enable CORS. The default allowed origin is '*', which allows sharing to any domain. <CORS> <enabled>false</enabled> <maxAge>1728000</maxAge> <allowedMethods>GET</allowedMethods> <allowedHeaders>Authorization</allowedHeaders> <allowedOrigin>*</allowedOrigin> </CORS> --> <!-- The <CatalogServices> element: - Services on local TDS served catalogs are always on. - Services on remote catalogs are set with the allowRemote element below. They are off by default (recommended). --> <CatalogServices> <allowRemote>false</allowRemote> </CatalogServices> <!-- Configuring the CDM (netcdf-java library) see http://www.unidata.ucar.edu/software/netcdf-java/reference/RuntimeLoading.html <nj22Config> <ioServiceProvider class="edu.univ.ny.stuff.FooFiles"/> <coordSysBuilder convention="foo" class="test.Foo"/> <coordTransBuilder name="atmos_ln_sigma_coordinates" type="vertical" class="my.stuff.atmosSigmaLog"/> <typedDatasetFactory datatype="Point" class="gov.noaa.obscure.file.Flabulate"/> </nj22Config> --> <!-- CDM uses the DiskCache directory to store temporary files, like uncompressed files. <DiskCache> <alwaysUse>false</alwaysUse> <scour>1 hour</scour> <maxSize>1 Gb</maxSize> </DiskCache> --> <!-- Caching open NetcdfFile objects. default is to allow 50 - 100 open files, cleanup every 11 minutes <NetcdfFileCache> <minFiles>50</minFiles> <maxFiles>100</maxFiles> <scour>11 min</scour> </NetcdfFileCache> --> <!-- The <HTTPFileCache> element: allow 10 - 20 open datasets, cleanup every 17 minutes used by HTTP Range requests. <HTTPFileCache> <minFiles>10</minFiles> <maxFiles>20</maxFiles> <scour>17 min</scour> </HTTPFileCache> --> <!-- Writing GRIB indexes. <GribIndexing> <setExtendIndex>false</setExtendIndex> <alwaysUseCache>false</alwaysUseCache> </GribIndexing> --> <!-- Persist joinNew aggregations to named directory. scour every 24 hours, delete stuff older than 90 days <AggregationCache> <scour>24 hours</scour> <maxAge>90 days</maxAge> <cachePathPolicy>NestedDirectory</cachePathPolicy> </AggregationCache> --> <!-- How to choose the template dataset for an aggregation. latest, random, or penultimate <Aggregation> <typicalDataset>penultimate</typicalDataset> </Aggregation> --> <!-- The Netcdf Subset Service is off by default. <NetcdfSubsetService> <allow>false</allow> <scour>10 min</scour> <maxAge>-1 min</maxAge> </NetcdfSubsetService> --> <!-- <Opendap> <ascLimit>50</ascLimit> <binLimit>500</binLimit> <serverVersion>opendap/3.7</serverVersion> </Opendap> --> <!-- The WCS Service is off by default. Also, off by default (and encouraged) is operating on a remote dataset. <WCS> <allow>false</allow> <allowRemote>false</allowRemote> <scour>15 min</scour> <maxAge>30 min</maxAge> </WCS> --> <!-- <WMS> <allow>false</allow> <allowRemote>false</allowRemote> <maxImageWidth>2048</maxImageWidth> <maxImageHeight>2048</maxImageHeight> </WMS> --> <!-- <NCISO> <ncmlAllow>false</ncmlAllow> <uddcAllow>false</uddcAllow> <isoAllow>false</isoAllow> </NCISO> --> <!-- CatalogGen service is off by default. <CatalogGen> <allow>false</allow> </CatalogGen> --> <!-- DLwriter service is off by default. As is support for operating on remote catalogs. <DLwriter> <allow>false</allow> <allowRemote>false</allowRemote> </DLwriter> --> <!-- DqcService is off by default. <DqcService> <allow>false</allow> </DqcService> --> <!-- Link to a Viewer application on the HTML page: <Viewer>my.package.MyViewer</Viewer> --> <!-- Add a DataSource - essentially an IOSP with access to Servlet request parameters <datasetSource>my.package.DatsetSourceImpl</datasetSource> --> <!-- set FeatureCollection logging <FeatureCollection> <RollingFileAppender> <MaxFileSize>1 MB</MaxFileSize> <MaxBackups>5</MaxBackups> <Level>INFO</Level> </RollingFileAppender> </FeatureCollection> --> <!-- Configure how the NetCDF-4 C library is discovered and used. libraryPath: The directory in which the native library is installed. libraryName: The name of the native library. This will be used to locate the proper .DLL, .SO, or .DYLIB file within the libraryPath directory. useForReading: By default, the native library is only used for writing NetCDF-4 files; a pure-Java layer is responsible for reading them. However, if this property is set to true, then it will be used for reading NetCDF-4 (and HDF5) files as well. --> <!-- <Netcdf4Clibrary> <libraryPath>/usr/local/lib</libraryPath> <libraryName>netcdf</libraryName> <useForReading>false</useForReading> </Netcdf4Clibrary> --> </threddsConfig> 24.4.2 Update the catalog For example to update the catalog with the latest data, run the following command from the root crontab. This cronjob will also synchronize the database with remote servers and dump your database (by default in /home/carya/dump) 0 * * * * /home/carya/pecan/scripts/cron.sh -o /home/carya/dump 24.4.3 Troubleshooting Refer to the log files for Tomcat (/var/log/tomcat8/*) and Thredds (/home/carya/thredds/logs). 24.4.4 Further reading Thredds reference "],["osinstall.html", "24.5 OS Specific Installations", " 24.5 OS Specific Installations Ubuntu CentOS OSX Install BETY THIS PAGE IS DEPRECATED Install Models Install Data 24.5.1 Ubuntu These are specific notes for installing PEcAn on Ubuntu (14.04) and will be referenced from the main installing PEcAn page. You will at least need to install the build environment and Postgres sections. If you want to access the database/PEcAn using a web browser you will need to install Apache. To access the database using the BETY interface, you will need to have Ruby installed. This document also contains information on how to install the Rstudio server edition as well as any other packages that can be helpful. 24.5.1.1 Install build environment sudo -s # point to latest R echo "deb http://cran.rstudio.com/bin/linux/ubuntu `lsb_release -s -c`/" > /etc/apt/sources.list.d/R.list apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E084DAB9 # update package list apt-get -y update # install packages needed for PEcAn apt-get -y install build-essential gfortran git r-base-core r-base-dev jags liblapack-dev libnetcdf-dev netcdf-bin bc libcurl4-gnutls-dev curl udunits-bin libudunits2-dev libgmp-dev python-dev libgdal1-dev libproj-dev expect # install packages needed for ED2 apt-get -y install openmpi-bin libopenmpi-dev # install requirements for DALEC apt-get -y install libgsl0-dev # install packages for webserver apt-get -y install apache2 libapache2-mod-php5 php5 # install packages to compile docs apt-get -y install texinfo texlive-latex-base texlive-latex-extra texlive-fonts-recommended # install devtools echo 'install.packages("devtools")' | R --vanilla # done as root exit 24.5.1.2 Install Postgres Documentation: http://trac.osgeo.org/postgis/wiki/UsersWikiPostGIS21UbuntuPGSQL93Apt sudo -s # point to latest PostgreSQL echo "deb http://apt.postgresql.org/pub/repos/apt `lsb_release -s -c`-pgdg main" > /etc/apt/sources.list.d/pgdg.list wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - # update package list apt-get -y update # install packages for postgresql (using a newer version than default) apt-get -y install libdbd-pgsql postgresql postgresql-client libpq-dev postgresql-9.4-postgis-2.1 postgresql-9.4-postgis-2.1-scripts # install following if you want to run pecan through the web apt-get -y install php5-pgsql # enable bety user to login with trust by adding the following lines after # the ability of postgres user to login in /etc/postgresql/9.4/main/pg_hba.conf local all bety trust host all bety 127.0.0.1/32 trust host all bety ::1/128 trust # Once done restart postgresql /etc/init.d/postgresql restart exit To install the BETYdb database .. 24.5.1.3 Apache Configuration PEcAn # become root sudo -s # get index page rm /var/www/html/index.html ln -s ${HOME}/pecan/documentation/index_vm.html /var/www/html/index.html # setup a redirect cat > /etc/apache2/conf-available/pecan.conf << EOF Alias /pecan ${HOME}/pecan/web <Directory ${HOME}/pecan/web> DirectoryIndex index.php Options +ExecCGI Require all granted </Directory> EOF a2enconf pecan /etc/init.d/apache2 restart # done as root exit 24.5.1.4 Apache Configuration BETY sudo -s # install all ruby related packages apt-get -y install ruby2.0 ruby2.0-dev libapache2-mod-passenger # link static content ln -s ${HOME}/bety/public /var/www/html/bety # setup a redirect cat > /etc/apache2/conf-available/bety.conf << EOF RailsEnv production RailsBaseURI /bety PassengerRuby /usr/bin/ruby2.0 <Directory /var/www/html/bety> Options +FollowSymLinks Require all granted </Directory> EOF a2enconf bety /etc/init.d/apache2 restart 24.5.1.5 Rstudio-server NOTE This will allow anybody to login to the machine through the rstudio interface and run any arbitrary code. The login used however is the same as the system login/password. wget http://download2.rstudio.org/rstudio-server-0.98.1103-amd64.deb # bceome root sudo -s # install required packages apt-get -y install libapparmor1 apparmor-utils libssl0.9.8 # install rstudio dpkg -i rstudio-server-* rm rstudio-server-* echo "www-address=127.0.0.1" >> /etc/rstudio/rserver.conf echo "r-libs-user=~/R/library" >> /etc/rstudio/rsession.conf rstudio-server restart # setup rstudio forwarding in apache a2enmod proxy_http cat > /etc/apache2/conf-available/rstudio.conf << EOF ProxyPass /rstudio/ http://localhost:8787/ ProxyPassReverse /rstudio/ http://localhost:8787/ RedirectMatch permanent ^/rstudio$ /rstudio/ EOF a2enconf rstudio /etc/init.d/apache2 restart # all done, exit root exit 24.5.1.6 Additional packages HDF5 Tools, netcdf, GDB and emacs sudo apt-get -y install hdf5-tools cdo nco netcdf-bin ncview gdb emacs ess nedit 24.5.2 CentOS/RedHat These are specific notes for installing PEcAn on CentOS (7) and will be referenced from the main installing PEcAn page. You will at least need to install the build environment and Postgres sections. If you want to access the database/PEcAn using a web browser you will need to install Apache. To access the database using the BETY interface, you will need to have Ruby installed. This document also contains information on how to install the Rstudio server edition as well as any other packages that can be helpful. 24.5.2.1 Install build environment sudo -s # install packages needed for PEcAn yum -y groupinstall 'Development Tools' yum -y install git netcdf-fortran-openmpi-devel R bc curl libxml2-devel openssl-devel ed udunits2 udunits2-devel netcdf netcdf-devel gmp-devel python-devel gdal-devel proj-devel proj-epsg expect # jags yum -y install http://download.opensuse.org/repositories/home:/cornell_vrdc/CentOS_7/x86_64/jags3-3.4.0-54.1.x86_64.rpm yum -y install http://download.opensuse.org/repositories/home:/cornell_vrdc/CentOS_7/x86_64/jags3-devel-3.4.0-54.1.x86_64.rpm # fix include folder for udunits2 ln -s /usr/include/udunits2/* /usr/include/ # install packages needed for ED2 yum -y install environment-modules openmpi-bin libopenmpi-dev # install requirements for DALEC yum -y install gsl-devel # install packages for webserver yum -y install httpd php systemctl enable httpd systemctl start httpd firewall-cmd --zone=public --add-port=80/tcp --permanent firewall-cmd --reload # install packages to compile docs #apt-get -y install texinfo texlive-latex-base texlive-latex-extra texlive-fonts-recommended # install devtools echo 'install.packages("devtools")' | R --vanilla # done as root exit echo "module load mpi" >> ~/.bashrc module load mpi 24.5.2.2 Install and configure PostgreSQL, udunits2, NetCDF sudo -s # point to latest PostgreSQL yum install -y epel-release yum -y install http://yum.postgresql.org/9.4/redhat/rhel-7-x86_64/pgdg-centos94-9.4-1.noarch.rpm # install packages for postgresql (using a newer version than default) yum -y install postgresql94-server postgresql94-contrib postgis2_94 postgresql94-devel udunits2 netcdf # install following if you want to run pecan through the web yum -y install php-pgsql # enable bety user to login with trust by adding the following lines after # the ability of postgres user to login in /var/lib/pgsql/9.4/data/pg_hba.conf local all bety trust host all bety 127.0.0.1/32 trust host all bety ::1/128 trust # Create database /usr/pgsql-9.4/bin/postgresql94-setup initdb # Enable postgres systemctl enable postgresql-9.4 systemctl start postgresql-9.4 exit 24.5.2.3 Apache Configuration PEcAn Install and Start Apache yum -y install httpd systemctl enable httpd systemctl start httpd # become root sudo -s # get index page rm /var/www/html/index.html ln -s /home/carya/pecan/documentation/index_vm.html /var/www/html/index.html # fix selinux context (does this need to be done after PEcAn is installed?) chcon -R -t httpd_sys_content_t /home/carya/pecan /home/carya/output # setup a redirect cat > /etc/httpd/conf.d/pecan.conf << EOF Alias /pecan /home/carya/pecan/web <Directory /home/carya/pecan/web> DirectoryIndex index.php Options +ExecCGI Require all granted </Directory> EOF a2enconf pecan /etc/init.d/apache2 restart # done as root exit 24.5.2.4 Apache Configuration BETY sudo -s # install all ruby related packages sudo curl --fail -sSLo /etc/yum.repos.d/passenger.repo https://oss-binaries.phusionpassenger.com/yum/definitions/el-passenger.repo yum -y install ruby ruby-devel mod_passenger # link static content ln -s /home/carya/bety/public /var/www/html/bety # fix GemFile echo 'gem "test-unit"' >> bety/Gemlile # fix selinux context (does this need to be done after bety is installed?) chcon -R -t httpd_sys_content_t /home/carya/bety # setup a redirect cat > /etc/httpd/conf.d/bety.conf << EOF RailsEnv production RailsBaseURI /bety PassengerRuby /usr/bin/ruby <Directory /var/www/html/bety> Options +FollowSymLinks Require all granted </Directory> EOF systemctl restart httpd 24.5.2.5 Rstudio-server NEED FIXING NOTE This will allow anybody to login to the machine through the rstudio interface and run any arbitrary code. The login used however is the same as the system login/password. wget http://download2.rstudio.org/rstudio-server-0.98.1103-amd64.deb # bceome root sudo -s # install required packages apt-get -y install libapparmor1 apparmor-utils libssl0.9.8 # install rstudio dpkg -i rstudio-server-* rm rstudio-server-* echo "www-address=127.0.0.1" >> /etc/rstudio/rserver.conf echo "r-libs-user=~/R/library" >> /etc/rstudio/rsession.conf rstudio-server restart # setup rstudio forwarding in apache a2enmod proxy_http cat > /etc/apache2/conf-available/rstudio.conf << EOF ProxyPass /rstudio/ http://localhost:8787/ ProxyPassReverse /rstudio/ http://localhost:8787/ RedirectMatch permanent ^/rstudio$ /rstudio/ EOF a2enconf rstudio /etc/init.d/apache2 restart # all done, exit root exit Alternative Rstudio instructions 24.5.2.5.1 Install and configure Rstudio-server Install RStudio Server by following the official documentation for your platform. Then, proceed with the following: add PATH=$PATH:/usr/sbin:/sbin to /etc/profile cat "PATH=$PATH:/usr/sbin:/sbin; export PATH" >> /etc/profile add rstudio.conf to /etc/httpd/conf.d/ wget https://gist.github.com/dlebauer/6921889/raw/d1e0f945228e5519afa6223d6f49d6e0617262bd/rstudio.conf sudo mv rstudio.conf /httpd/conf.d/ restart the Apache server: sudo httpd restart now you should be able to access http://<server>/rstudio 24.5.2.6 Install ruby-netcdf gem cd $RUBY_APPLICATION_HOME export $NETCDF_URL=http://www.gfd-dennou.org/arch/ruby/products/ruby-netcdf/release/ruby-netcdf-0.6.6.tar.gz export $NETCDF_DIR=/usr/local/netcdf gem install narray export NARRAY_DIR="$(ls $GEM_HOME/gems | grep 'narray-')" export NARRAY_PATH="$GEM_HOME/gems/$NARRAY_DIR" cd $MY_RUBY_HOME/bin wget $NETCDF_URL -O ruby-netcdf.tgz tar zxf ruby-netcdf.tgz && cd ruby-netcdf-0.6.6/ ruby -rubygems extconf.rb --with-narray-include=$NARRAY_PATH --with-netcdf-dir=/usr/local/netcdf-4.3.0 sed -i 's|rb/$|rb|' Makefile make make install cd ../ && sudo rm -rf ruby-netcdf* cd $RUBY_APPLICATION bundle install --without development 24.5.2.7 Additional packages NEED FIXING HDF5 Tools, netcdf, GDB and emacs sudo apt-get -y install hdf5-tools cdo nco netcdf-bin ncview gdb emacs ess nedit 24.5.3 Mac OSX These are specific notes for installing PEcAn on Mac OSX and will be referenced from the main installing PEcAn page. You will at least need to install the build environment and Postgres sections. If you want to access the database/PEcAn using a web browser you will need to install Apache. To access the database using the BETY interface, you will need to have Ruby installed. This document also contains information on how to install the Rstudio server edition as well as any other packages that can be helpful. 24.5.3.1 Install build environment # install R # download from http://cran.r-project.org/bin/macosx/ # install gfortran # download from http://cran.r-project.org/bin/macosx/tools/ # install OpenMPI curl -o openmpi-1.6.3.tar.gz http://www.open-mpi.org/software/ompi/v1.6/downloads/openmpi-1.6.3.tar.gz tar zxf openmpi-1.6.3.tar.gz cd openmpi-1.6.3 ./configure --prefix=/usr/local make all sudo make install cd .. # install szip curl -o szip-2.1-MacOSX-intel.tar.gz ftp://ftp.hdfgroup.org/lib-external/szip/2.1/bin/szip-2.1-MacOSX-intel.tar.gz tar zxf szip-2.1-MacOSX-intel.tar.gz sudo mv szip-2.1-MacOSX-intel /usr/local/szip # install HDF5 curl -o hdf5-1.8.11.tar.gz http://www.hdfgroup.org/ftp/HDF5/current/src/hdf5-1.8.11.tar.gz tar zxf hdf5-1.8.11.tar.gz cd hdf5-1.8.11 sed -i -e 's/-O3/-O0/g' config/gnu-flags ./configure --prefix=/usr/local/hdf5 --enable-fortran --enable-cxx --with-szlib=/usr/local/szip make # make check sudo make install # sudo make check-install cd .. 24.5.3.2 Install Postgres For those on a Mac I use the following app for postgresql which has postgis already installed (http://postgresapp.com/) To get postgis run the following commands in psql: ##### Enable PostGIS (includes raster) CREATE EXTENSION postgis; ##### Enable Topology CREATE EXTENSION postgis_topology; ##### fuzzy matching needed for Tiger CREATE EXTENSION fuzzystrmatch; ##### Enable US Tiger Geocoder CREATE EXTENSION postgis_tiger_geocoder; To check your postgis run the following command again in psql: SELECT PostGIS_full_version(); 24.5.3.3 Additional installs 24.5.3.3.1 Install JAGS Download JAGS from http://sourceforge.net/projects/mcmc-jags/files/JAGS/3.x/Mac%20OS%20X/JAGS-Mavericks-3.4.0.dmg/download 24.5.3.3.2 Install udunits Installing udunits-2 on MacOSX is done from source. download most recent version of Udunits here instructions for compiling from source curl -o udunits-2.1.24.tar.gz ftp://ftp.unidata.ucar.edu/pub/udunits/udunits-2.1.24.tar.gz tar zxf udunits-2.1.24.tar.gz cd udunits-2.1.24 ./configure make sudo make install 24.5.3.4 Apache Configuration Mac does not support pdo/postgresql by default. The easiest way to install is use: http://php-osx.liip.ch/ To enable pecan to run from your webserver. cat > /etc/apache2/others/pecan.conf << EOF Alias /pecan ${PWD}/pecan/web <Directory ${PWD}/pecan/web> DirectoryIndex index.php Options +All Require all granted </Directory> EOF 24.5.3.5 Ruby The default version of ruby should work. Or use JewelryBox. 24.5.3.6 Rstudio Server For the mac you can download Rstudio Desktop. 24.5.4 Installing BETY **************THIS PAGE IS DEPRECATED************* Official Instructions for BETY are maintained here: https://pecan.gitbook.io/betydb-documentation If you would like to install the Docker Version of BETY, please consult the PEcAn Docker section. 24.5.4.1 Install Database + Data note To install BETYdb without PEcAn, first download the load.bety.sh script # install database (code assumes password is bety) sudo -u postgres createuser -d -l -P -R -S bety sudo -u postgres createdb -O bety bety sudo -u postgres ./scripts/load.bety.sh -c YES -u YES -r 0 sudo -u postgres ./scripts/load.bety.sh -r 1 sudo -u postgres ./scripts/load.bety.sh -r 2 # configure for PEcAn web app (change password if needed) cp web/config.example.php web/config.php # add models to database (VM only) ./scripts/add.models.sh # add data to database ./scripts/add.data.sh # create outputs folder mkdir ~/output chmod 777 ~/output 24.5.4.2 Installing BETYdb Web Application There are two flavors of BETY, PHP and RUBY. The PHP version allows for a minimal interaction with the database while the RUBY version allows for full interaction with the database. 24.5.4.2.1 PHP version The php version comes with PEcAn and is already configured. 24.5.4.2.2 RUBY version The RUBY version requires a few extra packages to be installed first. Next we install the web app. # install bety cd git clone https://github.com/PecanProject/bety.git # install gems cd bety sudo gem2.0 install bundler bundle install --without development:test:javascript_testing:debug and configure BETY # create folders for upload folders mkdir paperclip/files paperclip/file_names chmod 777 paperclip/files paperclip/file_names # create folder for log files mkdir log touch log/production.log chmod 0666 log/production.log # fix configuration for vm cp config/additional_environment_vm.rb config/additional_environment.rb chmod go+w public/javascripts/cache/ # setup bety database configuration cat > config/database.yml << EOF production: adapter: postgis encoding: utf-8 reconnect: false database: bety pool: 5 username: bety password: bety EOF # setup login tokens cat > config/initializers/site_keys.rb << EOF REST_AUTH_SITE_KEY = 'thisisnotasecret' REST_AUTH_DIGEST_STRETCHES = 10 EOF 24.5.5 Install Models This page contains instructions on how to download and install ecosystem models that have been or are being coupled to PEcAn. These instructions have been tested on the PEcAn unbuntu VM. Commands may vary on other operating systems. Also, some model downloads require permissions before downloading, making them unavailable to the general public. Please contact the PEcAn team if you would like access to a model that is not already installed on the default PEcAn VM. BioCro CLM 4.5 DALEC ED2 FATES GDAY JULES LINKAGES LPJ-GUESS MAESPA SIPNET 24.5.5.1 BioCro # Version 0.951 echo 'devtools::install_github("ebimodeling/biocro@0.951")' | R --vanilla # Version 1.x echo 'devtools::install_github("ebimodeling/biocro-dev@26e0125b")' | R --vanilla BioCro Developers: request access from @dlebauer on GitHub 24.5.5.2 CLM 4.5 The version of CLM installed on PEcAn is the ORNL branch provided by Dan Ricciuto. This version includes Dan’s point-level CLM processing scripts Download the code (~300M compressed), input data (1.7GB compressed and expands to 14 GB), and a few misc inputs. mkdir models cd models wget ftp://nacp.ornl.gov/synthesis/2008/firenze/site/clm4_5_1_r085.tar.gz wget ftp://nacp.ornl.gov/synthesis/2008/firenze/site/clm/ccsm_inputdata.tar.gz tar -xvzf clm4_5* tar -xvzf ccsm_inputdata.tar.gz #Parameter file: mkdir /home/carya/models/ccsm_inputdata/lnd/clm2/paramdata cd /home/carya/models/ccsm_inputdata/lnd/clm2/paramdata wget ftp://nacp.ornl.gov/synthesis/2008/firenze/site/clm_params.c130821.nc wget ftp://nacp.ornl.gov/synthesis/2008/firenze/site/clm_params.c140423.nc #Domain file: cd /home/carya/models/ccsm_inputdata/share/domains/domain.clm/ wget ftp://nacp.ornl.gov/synthesis/2008/firenze/site/domain.lnd.1x1pt_US-UMB_navy.nc #Aggregated met data file: cd /home/carya/models/ccsm_inputdata/atm/datm7/CLM1PT_data/1x1pt_US-UMB wget ftp://nacp.ornl.gov/synthesis/2008/firenze/site/all_hourly.nc ## lightning database cd /home/carya/models/ccsm_inputdata/atm/datm7/NASA_LIS/ wget ftp://nacp.ornl.gov/synthesis/2008/firenze/site/clmforc.Li_2012_climo1995-2011.T62.lnfm_Total_c140423.nc ## surface data cd /home/carya/models/ccsm_inputdata/lnd/clm2/surfdata wget ftp://nacp.ornl.gov/synthesis/2008/firenze/site/clm/surfdata_360x720cru_simyr1850_c130927.nc cd /home/carya/models/ccsm_inputdata/lnd/clm2/surfdata_map wget ftp://nacp.ornl.gov/synthesis/2008/firenze/site/clm/surfdata_1x1pt_US-UMB_I1850CLM45CN_simyr1850.nc_new mv surfdata_1x1pt_US-UMB_I1850CLM45CN_simyr1850.nc_new surfdata_1x1pt_US-UMB_I1850CLM45CN_simyr1850.nc Required libraries sudo apt-get install mercurial csh tcsh subversion cmake sudo ln -s /usr/bin/make /usr/bin/gmake Compile and build default inputs cd ~/carya/models/clm4_5_1_r085/scripts python runCLM.py --site US-UMB ––compset I1850CLM45CN --mach ubuntu --ccsm_input /home/carya/models/ccsm_inputdata --tstep 1 --nopointdata --coldstart --cpl_bypass --clean_build 24.5.5.2.1 CLM Test Run You will see a new directory in scripts: US-UMB_I1850CLM45CN Enter this directory and run (you shouldn’t have to do this normally, but there is a bug with the python script and doing this ensures all files get to the right place): ./US-UMB_I1850CLM45CN.build Next you are ready to go to the run directory: /home/carya/models/clm4_5_1_r085/run/US-UMB_I1850CLM45CN/run Open to edit file: datm.streams.txt.CLM1PT.CLM_USRDAT and check file paths such that all paths start with /home/carya/models/ccsm_inputdata From this directory, launch the executable that resides in the bld directory: /home/carya/clm4_5_1_r085/run/US-UMB_I1850CLM45CN/bld/cesm.exe not sure this was the right location, but wherever the executable is You should begin to see output files that look like this: US-UMB_I1850CLM45CN.clm2.h0.yyyy-mm.nc (yyyy is year, mm is month) These are netcdf files containing monthly averages of lots of variables. The lnd_in file in the run directory can be modified to change the output file frequency and variables. 24.5.5.3 DALEC cd curl -o dalec_EnKF_pub.tgz http://isda.ncsa.illinois.edu/~kooper/EBI/dalec_EnKF_pub.tgz tar zxf dalec_EnKF_pub.tgz rm dalec_EnKF_pub.tgz cd dalec_EnKF_pub make dalec_EnKF make dalec_seqMH sudo cp dalec_EnKF dalec_seqMH /usr/local/bin 24.5.5.4 ED2 24.5.5.4.1 ED2.2 r46 (used in PEcAn manuscript) # ---------------------------------------------------------------------- # Get version r46 with a few patches for ubuntu cd curl -o ED.r46.tgz http://isda.ncsa.illinois.edu/~kooper/EBI/ED.r46.tgz tar zxf ED.r46.tgz rm ED.r46.tgz # ---------------------------------------------------------------------- # configure and compile ed cd ~/ED.r46/ED/build/bin curl -o include.mk.VM http://isda.ncsa.illinois.edu/~kooper/EBI/include.mk.opt.`uname -s` make OPT=VM sudo cp ../ed_2.1-VM /usr/local/bin/ed2.r46 Perform a test run using pre configured ED settings for ED2.2 r46 # ---------------------------------------------------------------------- # Create sample run cd mkdir testrun.ed.r46 cd testrun.ed.r46 curl -o ED2IN http://isda.ncsa.illinois.edu/~kooper/EBI/ED2IN.r46 sed -i -e "s#\\$HOME#$HOME#" ED2IN curl -o config.xml http://isda.ncsa.illinois.edu/~kooper/EBI/config.r46.xml # execute test run time ed2.r46 24.5.5.4.2 ED 2.2 r82 cd curl -o ED.r82.tgz http://isda.ncsa.illinois.edu/~kooper/EBI/ED.r82.tgz tar zxf ED.r82.tgz rm ED.r82.tgz cd ED.r82 curl -o ED.r82.patch http://isda.ncsa.illinois.edu/~kooper/EBI/ED.r82.patch patch -p1 < ED.r82.patch cd ED/build/bin curl -o include.mk.VM http://isda.ncsa.illinois.edu/~kooper/EBI/include.mk.opt.`uname -s` make OPT=VM sudo cp ../ed_2.1-VM /usr/local/bin/ed2.r82 Perform a test run using pre configured ED settings for ED2.2 r82 cd mkdir testrun.ed.r82 cd testrun.ed.r82 curl -o ED2IN http://isda.ncsa.illinois.edu/~kooper/EBI/ED2IN.r82 sed -i -e "s#\\$HOME#$HOME#" ED2IN curl -o config.xml http://isda.ncsa.illinois.edu/~kooper/EBI/config.r82.xml # execute test run time ed2.r82 24.5.5.4.3 ED 2.2 bleeding edge cd git clone https://github.com/EDmodel/ED2.git cd ED2/ED/build/bin curl -o include.mk.VM http://isda.ncsa.illinois.edu/~kooper/EBI/include.mk.opt.`uname -s` ./generate_deps.sh make OPT=VM sudo cp ../ed_2.1-VM /usr/local/bin/ed2.git 24.5.5.5 CLM-FATES Prerequisites sudo apt-get upgrade libnetcdf-dev sudo apt-get install subversion sudo apt-get install csh sudo apt-get install cmake sudo ln -s /usr/bin/make /usr/bin/gmake sudo rm /bin/sh sudo ln -s /bin/bash /bin/sh wget https://github.com/Unidata/netcdf-fortran/archive/v4.4.4.tar.gz cd netcdf-4.4.4 ./configure make sudo make install you might need to mess around with installing netcdf and netcdf-fortran to get a version FATES likes… Get code from Github (currently private) and go to cime/scripts directory git clone git@github.com:NGEET/ed-clm.git cd ed-clm/cime/scripts/ Within CLM-FATES, to be able to build an executable we need to create a reference run. We’ll also use this reference run to grab defaults from, so we’ll be registering the location of both the reference case (location of executable, scripts, etc) and the reference inputs with the PEcAn database. To begin, copy reference run script from pecan cp ~/pecan/models/fates/inst/create_1x1_ref_case.sh . Edit reference case script to set NETCDF_HOME, CROOT (reference run case), DIN_LOC_ROOT (reference run inputs). Also, make sure DIN_LOC_ROOT exists as FATES will not create it itself. Then run the script ./create_1x1_ref_case.sh Be aware that this script WILL ask you for your password on the NCAR server to download the reference case input data (the guest password may work, haven’t tried this). If it gives an error at the pio stage check the log, but the most likely error is it being unable to find a version of netcdf it likes. Once FATES is installed, set the whole reference case directory as the Model path (leave filename blank) and set the whole inputs directory as an Input with format clm_defaults. 24.5.5.6 GDAY Navigate to a directory you would like to store GDAY and run the following: git clone https://github.com/mdekauwe/GDAY.git cd GDAY cd src make gday is your executable. 24.5.5.7 JULES INSTALL STEPS: 1) Download JULES and FCM JULES: Model requires registration to download. Not to be put on PEcAn VM Getting Started documentation: https://jules.jchmr.org/content/getting-started Registration: http://jules-lsm.github.io/access_req/JULES_access.html FCM: https://github.com/metomi/fcm/ wget https://github.com/metomi/fcm/archive/2015.05.0.tar.gz edit makefile open etc/fcm-make/make.cfg set JULES_NETCDF = actual instead of dummy set path (e.g. /usr/) and lib_path /lib64 to netCDF libraries compile JULES cd etc/fcm-make/ {path.to.fcm}/fcm make -f etc/fcm-make/make.cfg --new UBUNTU VERSION: installed without having to add any perl libraries #perl stuff that I had to install on pecan2 not PEcAN VM sudo yum install perl-Digest-SHA sudo yum install perl-Time-modules sudo yum install cpan curl -L http://cpanmin.us | perl - --sudo App::cpanminus sudo cpanm Time/Piece.pm sudo cpanm IO/Uncompress/Gunzip.pm Executable is under build/bin/jules.exe Example rundir: examples/point_loobos 24.5.5.8 LINKAGES 24.5.5.8.1 R Installation # Public echo 'devtools::install_github("araiho/linkages_package")' | R --vanilla 24.5.5.8.2 FORTRAN VERSION #FORTRAN VERSION cd git clone https://github.com/araiho/Linkages.git cd Linkages gfortran -o linkages linkages.f sudo cp linkages /usr/local/bin/linkages.git 24.5.5.9 LPJ-GUESS Instructions to download source code Go to LPJ-GUESS website for instructions to access code. 24.5.5.10 MAESPA Navigate to a directory you would like store MAESPA and run the following: git clone https://bitbucket.org/remkoduursma/maespa.git cd maespa make maespa.out is your executable. Example input files can be found in the inpufiles directory. Executing measpa.out from within one of the example directories will produce output. MAESPA developers have also developed a wrapper package called Maeswrap. The usual R package installation method install.packages may present issues with downloading an unpacking a dependency package called rgl. Here are a couple of solutions: 24.5.5.10.1 Solution 1 ### From the Command Line sudo apt-get install r-cran-rgl then from within R install.packages("Maeswrap") 24.5.5.10.2 Solution 2 ### From the Command line sudo apt-get install libglu1-mesa-dev then from within R install.packages("Maeswrap") 24.5.5.11 SIPNET {inst-sipnet} cd curl -o sipnet_unk.tar.gz http://isda.ncsa.illinois.edu/~kooper/EBI/sipnet_unk.tar.gz tar zxf sipnet_unk.tar.gz rm sipnet_unk.tar.gz cd sipnet_unk make sudo cp sipnet /usr/local/bin/sipnet.runk 24.5.5.11.1 SIPNET testrun cd curl -o testrun.sipnet.tar.gz http://isda.ncsa.illinois.edu/~kooper/EBI/testrun.sipnet.tar.gz tar zxf testrun.sipnet.tar.gz rm testrun.sipnet.tar.gz cd testrun.sipnet sipnet.runk 24.5.6 Installing data for PEcAn PEcAn assumes some of the data to be installed on the machine. This page will describe how to install this data. 24.5.6.1 Site Information These are large-ish files that contain data used with ED2 and SIPNET rm -rf sites curl -o sites.tgz http://isda.ncsa.illinois.edu/~kooper/EBI/sites.tgz tar zxf sites.tgz sed -i -e "s#/home/kooper/Projects/EBI#${PWD}#" sites/*/ED_MET_DRIVER_HEADER rm sites.tgz rm -rf inputs curl -o inputs.tgz http://isda.ncsa.illinois.edu/~kooper/EBI/inputs.tgz tar zxf inputs.tgz rm inputs.tgz 24.5.6.2 FIA database FIA database is large and will add an extra 10GB to the installation. # download and install database curl -o fia5data.psql.gz http://isda.ncsa.illinois.edu/~kooper/EBI/fia5data.psql.gz dropdb --if-exists fia5data createdb -O bety fia5data gunzip fia5data.psql.gz psql -U bety -d fia5data < fia5data.psql rm fia5data.psql 24.5.6.3 Flux Camp Following will install the data for flux camp (as well as the demo script for PEcAn). cd curl -o plot.tgz http://isda.ncsa.illinois.edu/~kooper/EBI/plot.tgz tar zxf plot.tgz rm plot.tgz 24.5.6.4 Harvard for ED tutorial Add datasets and runs curl -o Santarem_Km83.zip http://isda.ncsa.illinois.edu/~kooper/EBI/Santarem_Km83.zip unzip -d sites Santarem_Km83.zip sed -i -e "s#/home/pecan#${HOME}#" sites/Santarem_Km83/ED_MET_DRIVER_HEADER rm Santarem_Km83.zip curl -o testrun.s83.zip http://isda.ncsa.illinois.edu/~kooper/EBI/testrun.s83.zip unzip testrun.s83.zip sed -i -e "s#/home/pecan#${HOME}#" testrun.s83/ED2IN rm testrun.s83.zip curl -o ed2ws.harvard.tgz http://isda.ncsa.illinois.edu/~kooper/EBI/ed2ws.harvard.tgz tar zxf ed2ws.harvard.tgz mkdir ed2ws.harvard/analy ed2ws.harvard/histo sed -i -e "s#/home/pecan#${HOME}#g" ed2ws.harvard/input_harvard/met_driver/HF_MET_HEADER ed2ws.harvard/ED2IN ed2ws.harvard/*.r rm ed2ws.harvard.tgz curl -o testrun.PDG.zip http://isda.ncsa.illinois.edu/~kooper/EBI/testrun.PDG.zip unzip testrun.PDG.zip sed -i -e "s#/home/pecan#${HOME}#" testrun.PDG/Met/PDG_MET_DRIVER testrun.PDG/Template/ED2IN sed -i -e 's#/n/scratch2/moorcroft_lab/kzhang/PDG/WFire_Pecan/##' testrun.PDG/Template/ED2IN rm testrun.PDG.zip curl -o create_met_driver.tar.gz http://isda.ncsa.illinois.edu/~kooper/EBI/create_met_driver.tar.gz tar zxf create_met_driver.tar.gz rm create_met_driver.tar.gz "],["docker-index.html", "25 Docker", " 25 Docker This chapter describes the PEcAn Docker container infrastructure. It contains the following sections: Introduction to Docker – Brief introduction to Docker and docker-compose Docker quickstart – Brief tutorial for setting up a Docker-based PEcAn instance PEcAn Docker Architecture – Detailed description of the containers comprising the PEcAn Docker-based infrastructure Dockerfiles for models – General guide for writing Dockerfiles for new models Building and modifying images Troubleshooting Docker Migrating from VM to Docker – Steps to migrate from running PEcAn on a VM to a docker. "],["docker-intro.html", "25.1 Introduction to Docker?", " 25.1 Introduction to Docker? What is Docker Working with Docker docker-compose 25.1.1 What is Docker? For a quick and accessible introduction to Docker, we suggest this YouTube video: Learn Docker in 12 Minutes. For more comprehensive Docker documentation, we refer you to the Docker documentation website. For a useful analogy for Docker containerization, we refer you to the webcomic xkcd. Docker is a technology for encapsulating software in “containers”, somewhat similarly to virtual machines. Like virtual machines, Docker containers facilitate software distribution by bundling the software with all of its dependencies in a single location. Unlike virtual machines, Docker containers are meant to only run a single service or process and are build on top of existing services provided by the host OS (such as disk access, networking, memory management etc.). In Docker, an image refers to a binary snapshot of a piece of software and all of its dependencies. A container refers to a running instance of a particular image. A good rule of thumb is that each container should be responsible for no more than one running process. A software stack refers to a collection of containers, each responsible for its own process, working together to power a particular application. Docker makes it easy to run multiple software stacks at the same time in parallel on the same machine. Stacks can be given a unique name, which is passed along as a prefix to all their containers. Inside these stacks, containers can communicate using generic names not prefixed with the stack name, making it easy to deploy multiple stacks with the same internal configuration. Containers within the same stack communicate with each other via a common network. Like virtual machines or system processes, Docker stacks can also be instructed to open specific ports to facilitate communication with the host and other machines. The PEcAn database BETY provides an instructive case-study. BETY is comprised of two core processes – a PostgreSQL database, and a web-based front-end to that database (Apache web server with Ruby on Rails). Running BETY as a “Dockerized” application therefore involves two containers – one for the PostgreSQL database, and one for the web server. We could build these containers ourselves by starting from a container with nothing but the essentials of a particular operating system, but we can save some time and effort by starting with an existing image for PostgreSQL from Docker Hub. When starting a Dockerized BETY, we start the PostgreSQL container first, then start the BETY container telling it how to communicate with the PostgreSQL container. To upgrade an existing BETY instance, we stop the BETY container, download the latest version, tell it to upgrade the database, and re-start the BETY container. There is no need to install new dependencies for BETY since they are all shipped as part of the container. The PEcAn Docker architecture is designed to facilitate installation and maintenance on a variety of systems by eliminating the need to install and maintain complex system dependencies (such as PostgreSQL, Apache web server, and Shiny server). Furthermore, using separate Docker containers for each ecosystem model helps avoid clashes between different software version requirements of different models (e.g. some models require GCC <5.0, while others may require GCC >=5.0). The full PEcAn Docker stack is described in more detail in the next section. 25.1.2 Working with Docker To run an image, you can use the Docker command line interface. For example, the following runs a PostgreSQL image based on the pre-existing PostGIS image by mdillon: docker run \\ --detach \\ --rm \\ --name postgresql \\ --network pecan \\ --publish 9876:5432 \\ --volume ${PWD}/postgres:/var/lib/postgresql/data \\ mdillon/postgis:9.6-alpine This will start the PostgreSQL+PostGIS container. The following options were used: --detach makes the container run in the background. --rm removes the container when it is finished (make sure to use the volume below). --name the name of the container, also the hostname of the container which can be used by other docker containers in the same network inside docker. --network pecan the network that the container should be running in, this leverages of network isolation in docker and allows this container to be connected to by others using the postgresql hostname. --publish exposes the port to the outside world, this is like ssh, and maps port 9876 to port 5432 in the docker container --volume maps a folder on your local machine to the machine in the container. This allows you to save data on your local machine. mdillon/postgis:9.6-alpine is the actual image that will be run, in this case it comes from the group/person mdillon, the container is postgis and the version 9.6-alpine (version 9.6 build on alpine linux). Other options that might be used: --tty allocate a pseudo-TTY to send stdout and stderr back to the console. --interactive keeps stdin open so the user can interact with the application running. --env sets environment variables, these are often used to change the behavior of the docker container. To see a list of all running containers you can use the following command: docker ps To see the log files of this container you use the following command (you can either use their name or id as returned by docker ps). The -f flag will follow the stdout/stderr from the container, use Ctrl-C to stop following the stdout/stderr. docker logs -f postgresql To stop a running container use: docker stop postgresql Containers that are running in the foreground (without the --detach) can be stopped by pressing Ctrl-C. Any containers running in the background (with --detach) will continue running until the machine is restarted or the container is stopped using docker stop. 25.1.3 docker-compose For a quick introduction to docker-compose, we recommend the following YouTube video: Docker Compose in 12 Minutes. The complete docker-compose references can be found on the Docker documentation website. docker-compose provides a convenient way to configure and run a multi-container Docker stack. Basically, a docker-compose setup consists of a list of containers and their configuration parameters, which are then internally converted into a bunch of docker commands. To configure BETY as described above, we can use a docker-compose.yml file like the following: version: "3" services: postgres: image: mdillon/postgis:9.5 bety: image: pecan/bety depends_on: - postgres This simple file allows us to bring up a full BETY application with both database and BETY application. The BETY app will not be brought up until the database container has started. You can now start this application by changing into the same directory as the docker-compose.yml file (cd /path/to/file) and then running: docker-compose up This will start the application, and you will see the log files for the 2 different containers. "],["docker-quickstart.html", "25.2 Quick-start docker install", " 25.2 Quick-start docker install git clone git@github.com/pecanproject/pecan cd pecan # start database docker-compose -p pecan up -d postgres # add example data (first time only) docker-compose run --rm bety initialize docker run -ti --rm --network pecan_pecan --volume pecan_pecan:/data --env FQDN=docker pecan/data:develop # start PEcAn docker-compose -p pecan up -d # run a model curl -v -X POST \\ -F 'hostname=docker' \\ -F 'modelid=1000000014' \\ -F 'sitegroupid=1' \\ -F 'siteid=772' \\ -F 'sitename=Niwot Ridge Forest/LTER NWT1 (US-NR1)' \\ -F 'pft[]=temperate.coniferous' \\ -F 'start=2004/01/01' \\ -F 'end=2004/12/31' \\ -F 'input_met=5000000005' \\ -F 'email=' \\ -F 'notes=' \\ 'http://pecan.localhost/pecan/04-runpecan.php' "],["the-pecan-docker-install-process-in-detail.html", "25.3 The PEcAn docker install process in detail", " 25.3 The PEcAn docker install process in detail 25.3.1 Configure docker-compose This section will let you download some configuration files. The documentation provides links to the latest released version (master branch in GitHub) or the develop version that we are working on (develop branch in GitHub) which will become the next release. If you cloned the PEcAn GitHub repository you can use git checkout <branch> to switch branches. The PEcAn Docker stack is configured using a docker-compose.yml file. You can download just this file directly from GitHub latest or develop. You can also find this file in the root of cloned PEcAn GitHub repository. There is no need to edit the docker-compose.yml file. You can use either the .env file to change some of the settings, or the docker-compose.override.yml file to modify the docker-compose.yml file. This makes it easier for you to get an updated version of the docker-compose.yml file and not lose any changes you have made to it. Some of the settings in the docker-compose.yml can be set using a .env file. You can download either the latest or the develop version. If you have cloned the GitHub repository it is also located in the docker folder. This file should be called .env and be placed in the same folder as your docker-compose.yml file. This file will allow you to set which version of PEcAn or BETY to use. See the comments in this file to control the settings. Option you might want to set are: PECAN_VERSION : The docker images to use for PEcAn. The default is latest which is the latest released version of PEcAn. Setting this to develop will result in using the version of PEcAn which will become the next release. PECAN_FQDN : Is the name of the server where PEcAn is running. This is what is used to register all files generated by this version of PEcAn (see also TRAEFIK_HOST). PECAN_NAME : A short name of this PEcAn server that is shown in the pull down menu and might be easier to recognize. BETY_VERSION : This controls the version of BETY. The default is latest which is the latest released version of BETY. Setting this to develop will result in using the version of BETY which will become the next release. TRAEFIK_HOST : Should be the FQDN of the server, this is needed when generating a SSL certificate. For SSL certificates you will need to set TRAEFIK_ACME_ENABLE as well as TRAEFIK_ACME_EMAIL. TRAEFIK_IPFILTER : is used to limit access to certain resources, such as RabbitMQ and the Traefik dashboard. A final file, which is optional, is a docker-compose.override.yml. You can download a version for the latest and develop versions. If you have cloned the GitHub repository it is located in the docker folder. Use this file as an example of what you can do, only copy the pieces over that you really need. This will allow you to make changes to the docker-compose file for your local installation. You can use this to add additional containers to your stack, change the path where docker stores the data for the containers, or you can use this to open up the postgresql port. version: "3" services: # expose database to localhost for ease of access postgres: ports: - 5432:5432 Once you have the docker-compose.yml file as well as the optional .env and docker-compose.override.yml in a folder you can start the PEcAn stack. The following instructions assume you are in the same directory as the file (if not, cd into it). In the rest of this section we will use a few arguments for the docker-compose application. The location of these arguments are important. The general syntax of docker-compose is docker-compose <ARGUMENTS FOR DOCKER COMPOSE> <COMMAND> <ARGUMENTS FOR COMMAND> [SERVICES]. More generally, docker-compose options are very sensitive to their location relative to other commands in the same line – that is, docker-compose -f /my/docker-compose.yml -p pecan up -d postgres is not the same as docker-compose -d postgres -p pecan up -f /my/docker-compose.yml. If expected ever don’t seem to be working, check that the arguments are in the right order.) -f <filename> : ARGUMENTS FOR DOCKER COMPOSE : Allows you to specify a docker-compose.yml file explicitly. You can use this argument multiple times. Default is to use the docker-compose.yml and docker-compose.override.yml in your current folder. -p <projectname> : ARGUMENTS FOR DOCKER COMPOSE : Project name, all volumes, networks, and containers will be prefixed with this argument. The default value is to use the current folder name. -d : ARGUMENTS FOR up COMMAND : Will start all the containers in the background and return back to the command shell. If no services as added to the docker-compose command all services possible will be started. 25.3.2 Initialize PEcAn (first time only) Before you can start to use PEcAn for the first time you will need to initialize the database (and optionally add some data). The following two sections will first initialize the database and secondly add some data to the system. 25.3.2.1 Initialize the PEcAn database The commands described in this section will set up the PEcAn database (BETY) and pre-load it with some common “default” data. docker-compose -p pecan up -d postgres # If you have a custom docker-compose file: # docker-compose -f /path/to/my-docker-compose.yml -p pecan up -d postgres The breakdown of this command is as follows: -p pecan – This tells docker-compose to do all of this as part of a “project” -p we’ll call pecan. By default, the project name is set to the name of the current working directory. The project name will be used as a prefix to all containers started by this docker-compose instance (so, if we have a service called postgres, this will create a container called pecan_postgres). up -d – up is a command that initializes the containers. Initialization involves downloading and building the target containers and any containers they depend on, and then running them. Normally, this happens in the foreground, printing logs directly to stderr/stdout (meaning you would have to interrupt it with Ctrl-C), but the -d flag forces this to happen more quietly and in the background. postgres – This indicates that we only want to initialize the service called postgres (and its dependencies). If we omitted this, docker-compose would initialize all containers in the stack. The end result of this command is to initialize a “blank” PostGIS container that will run in the background. This container is not connected to any data (yet), and is basically analogous to just installing and starting PostgreSQL to your system. As a side effect, the above command will also create blank data “volumes” and a “network” that containers will use to communicate with each other. Because our project is called pecan and docker-compose.yml describes a network called pecan, the resulting network is called pecan_pecan. This is relevant to the following commands, which will actually initialize and populate the BETY database. Assuming the above has run successfully, next run the following: docker run --rm --network pecan_pecan pecan/db The breakdown of this command is as follows: {#docker-run-init} docker run – This says we will be running a container. --rm – This automatically removes the resulting container once the specified command exits, as well as any volumes associated with the container. This is useful as a general “clean-up” flag for one-off commands (like this one) to make sure you don’t leave any “zombie” containers or volumes around at the end. --network pecan_pecan – Thsi will start the container in the same network space as the posgres container, allowing it to push data into the database. pecan/db – This is the name of the container, this holds a copy of the database used to initialize the postgresql database. Note that this command may throw a bunch of errors related to functions and/or operators already existing. This is normal – it just means that the PostGIS extension to PostgreSQL is already installed. The important thing is that you see output near the end like: ---------------------------------------------------------------------- Safety checks ---------------------------------------------------------------------- ---------------------------------------------------------------------- Making sure user 'bety' exists. If you do not see this output, you can look at the troubleshooting section at the end of this section for some troubleshooting tips, as well as some solutions to common problems. Once the command has finished successfully, proceed with the next step which will load some initial data into the database and place the data in the docker volumes. 25.3.2.2 Add first user to PEcAn database You can add an initial user to the BETY database, for example the following commands will add the guestuser account as well as the demo carya account: # guest user docker-compose run --rm bety user guestuser guestuser "Guest User" guestuser@example.com 4 4 # example user docker-compose run --rm bety user carya illinois "Carya Demo User" carya@example.com 1 1 25.3.2.3 Add example data (first time only) The following command will add some initial data to the PEcAn stack and register the data with the database. docker run -ti --rm --network pecan_pecan --volume pecan_pecan:/data --env FQDN=docker pecan/data:develop The breakdown of this command is as follows: docker run – This says we will be running a specific command inside the target Docker container. See docker run --help and the Docker run reference for more information. -ti – This is actually two flags, -t to allocate a pseudo-tty and -i to keep STDIN open even if detached. -t is necessary to ensure lower-level script commands run correctly. -i makes sure that the command output (stdin) is displayed. --rm – This automatically removes the resulting container once the specified command exits, as well as any volumes associated with the container. This is useful as a general “clean-up” flag for one-off commands (like this one) to make sure you don’t leave any “zombie” containers or volumes around at the end. --network pecan_pecan – This indicates that the container will use the existing pecan_pecan network. This network is what ensures communication between the postgres container (which, recall, is just a PostGIS installation with some data) and the “volumes” where the actual data are persistently stored. pecan/data:develop – This is the name of the image in which to run the specified command, in the form repository/image:version. This is interpreted as follows: First, it sees if there are any images called pecan/data:develop available on your local machine. If there are, it uses that one. If that image version is not available locally, it will next try to find the image online. By default, it searches Docker Hub, such that pecan/data gets expanded to the container at https://hub.docker.com/r/pecan/data. For custom repositories, a full name can be given, such as hub.ncsa.illinois.edu/pecan/data:latest. If :version is omitted, Docker assumes :latest. NOTE that while online containers should have a :latest version, not all of them do, and if a :latest version does not exist, Docker will be unable to find the image and will throw an error. Everything after the image name (here, pecan/data:develop) is interpreted as an argument to the image’s specified entrypoint. --volume pecan_pecan:/data – This mounts the data from the subsequent container (pecan/data:develop) onto the current project volume, called pecan_pecan (as with the network, the project name pecan is the prefix, and the volume name also happens to be pecan as specified in the docker-compose.yml file). --env FQDN=docker – the Fully Qualified Domain Name, this is the same value as specified in the .env file (for the web, monitor and executor containers). This will link the data files to the name in the machines table in BETY. pecan/data:develop – As above, this is the target image to run. Since there is no argument after the image name, this command will run the default command (CMD) specified for this docker container. In this case, it is the docker/add_data.sh script from the PEcAn repository. Under the hood, this container runs the docker/add-data.sh script, which copies a bunch of input files and registers them with the PEcAn database. Successful execution of this command should take some time because it involves copying reasonably large amounts of data and performing a number of database operations. 25.3.2.4 Start PEcAn If you already completed the above steps, you can start the full stack by just running the following: docker-compose -p pecan up -d This will build and start all containers required to run PEcAn. With the -d flag, this will run all of these containers quietly in the background, and show a nice architecture diagram with the name and status of each container while they are starting. Once this is done you have a working instance of PEcAn. If all of the containers started successfully, you should be able to access the various components from a browser via the following URLs (if you run these commands on a remote machine replace localhost with the actual hostname). PEcAn web interface (running models) – http://pecan.localhost/pecan/ (NOTE: The trailing backslash is necessary.) PEcAn documentation and home page – http://pecan.localhost/ BETY web interface – http://pecan.localhost/bety/ Monitor, service that monitors models and shows all models that are online as well as how many instances are online and the number of jobs waiting. The output is in JSON – http://pecan.localhost/monitor/ 25.3.2.5 Start model runs using curl To test PEcAn you can use the following curl statement, or use the webpage to submit a request (if you run these commands on a remote machine replace localhost with the actual hostname): curl -v -X POST \\ -F 'hostname=docker' \\ -F 'modelid=5000000002' \\ -F 'sitegroupid=1' \\ -F 'siteid=772' \\ -F 'sitename=Niwot Ridge Forest/LTER NWT1 (US-NR1)' \\ -F 'pft[]=temperate.coniferous' \\ -F 'start=2004/01/01' \\ -F 'end=2004/12/31' \\ -F 'input_met=5000000005' \\ -F 'email=' \\ -F 'notes=' \\ 'http://pecan.localhost/pecan/04-runpecan.php' This should return some text with in there Location: this is shows the workflow id, you can prepend http://pecan.localhost/pecan/ to the front of this, for example: http://pecan.localhost/pecan/05-running.php?workflowid=99000000001. Here you will be able to see the progress of the workflow. To see what is happening behind the scenes you can use look at the log file of the specific docker containers, once of interest are pecan_executor_1 this is the container that will execute a single workflow and pecan_sipnet_1 which executes the sipnet mode. To see the logs you use docker logs pecan_executor_1 Following is an example output: 2018-06-13 15:50:37,903 [MainThread ] INFO : pika.adapters.base_connection - Connecting to 172.18.0.2:5672 2018-06-13 15:50:37,924 [MainThread ] INFO : pika.adapters.blocking_connection - Created channel=1 2018-06-13 15:50:37,941 [MainThread ] INFO : root - [*] Waiting for messages. To exit press CTRL+C 2018-06-13 19:44:49,523 [MainThread ] INFO : root - b'{"folder": "/data/workflows/PEcAn_99000000001", "workflowid": "99000000001"}' 2018-06-13 19:44:49,524 [MainThread ] INFO : root - Starting job in /data/workflows/PEcAn_99000000001. 2018-06-13 19:45:15,555 [MainThread ] INFO : root - Finished running job. This shows that the executor connects to RabbitMQ, waits for messages. Once it picks up a message it will print the message, and execute the workflow in the folder passed in with the message. Once the workflow (including any model executions) is finished it will print Finished. The log file for pecan_sipnet_1 is very similar, in this case it runs the job.sh in the run folder. To run multiple executors in parallel you can duplicate the executor section in the docker-compose file and just rename it from executor to executor1 and executor2 for example. The same can be done for the models. To make this easier it helps to deploy the containers using Kubernetes allowing to easily scale up and down the containers. 25.3.3 Troubleshooting When initializing the database, you will know you have encountered more serious errors if the command exits or hangs with output resembling the following: LINE 1: SELECT count(*) FROM formats WHERE ... ^ Error: Relation `formats` does not exist If the above command fails, you can try to fix things interactively by first opening a shell inside the container… docker run -ti --rm --network pecan_pecan pecan/bety:latest /bin/bash …and then running the following commands, which emulate the functionality of the entrypoint.sh with the initialize argument. # Create the bety role in the postgresql database psql -h postgres -p 5432 -U postgres -c "CREATE ROLE bety WITH LOGIN CREATEDB NOSUPERUSER NOCREATEROLE PASSWORD 'bety'" # Initialize the bety database itself, and set to be owned by role bety psql -h postgres -p 5432 -U postgres -c "CREATE DATABASE bety WITH OWNER bety" # If either of these fail with a "role/database bety already exists", # that's fine. You can safely proceed to the next command. # Load the actual bety database tables and values ./script/load.bety.sh -a "postgres" -d "bety" -p "-h postgres -p 5432" -o bety -c -u -g -m ${LOCAL_SERVER} -r 0 -w https://ebi-forecast.igb.illinois.edu/pecan/dump/all/bety.tar.gz "],["pecan-docker.html", "25.4 PEcAn Docker Architecture", " 25.4 PEcAn Docker Architecture Overview PEcAn’s docker-compose Top-level structure traefik portainer minio thredds postgres rabbitmq bety docs web executor monitor Model-specific containers 25.4.1 Overview The PEcAn docker architecture consists of many containers (see figure below) that will communicate with each other. The goal of this architecture is to easily expand the PEcAn system by deploying new model containers and registering them with PEcAn. Once this is done the user can now use these new models in their work. The PEcAn framework will setup the configurations for the models, and send a message to the model containers to start execution. Once the execution is finished the PEcAn framework will continue. This is exactly as if the model is running on a HPC machine. Models can be executed in parallel by launching multiple model containers. As can be seen in the figure the architecture leverages of two standard containers (in orange). The first container is postgresql with postgis (mdillon/postgis) which is used to store the database used by both BETY and PEcAn. The second containers is a messagebus, more specifically RabbitMQ (rabbitmq). The BETY app container (pecan/bety) is the front end to the BETY database and is connected to the postgresql container. A http server can be put in front of this container for SSL termination as well to allow for load balancing (by using multiple BETY app containers). The PEcAn framework containers consist of multiple unique ways to interact with the PEcAn system (none of these containers will have any models installed): PEcAn shiny hosts the shiny applications developed and will interact with the database to get all information necessary to display PEcAn rstudio is a rstudio environment with the PEcAn libraries preloaded. This allows for prototyping of new algorithms that can be used as part of the PEcAn framework later. PEcAn web allows the user to create a new PEcAn workflow. The workflow is stored in the database, and the models are executed by the model containers. PEcAn cli will allow the user to give a pecan.xml file that will be executed by the PEcAn framework. The workflow created from the XML file is stored in the database, and the models are executed by the model containers. The model containers contain the actual models that are executed as well as small wrappers to make them work in the PEcAn framework. The containers will run the model based on the parameters received from the message bus and convert the outputs back to the standard PEcAn output format. Once the container is finished processing a message it will immediatly get the next message and start processing it. 25.4.2 PEcAn’s docker-compose The PEcAn Docker architecture is described in full by the PEcAn docker-compose.yml file. For full docker-compose syntax, see the official documentation. This section describes the top-level structure and each of the services, which are as follows: traefik portainer minio thredds postgres rabbitmq bety docs web executor monitor Model-specific services For reference, the complete docker-compose file is as follows: version: '3.2' services: traefik: image: traefik:v2.9 command: - --log.level=INFO - --api=true - --api.dashboard=true - --entrypoints.web.address=:80 - --providers.docker=true - --providers.docker.endpoint=unix:///var/run/docker.sock - --providers.docker.exposedbydefault=false - --providers.docker.watch=true restart: unless-stopped networks: pecan security_opt: no-new-privileges:true ports: ${TRAEFIK_HTTP_PORT-80}:80 volumes: - traefik:/config - /var/run/docker.sock:/var/run/docker.sock:ro labels: - traefik.enable=true - traefik.http.routers.traefik.entrypoints=web - traefik.http.routers.traefik.rule=Host(`traefik.pecan.localhost`) - traefik.http.routers.traefik.service=api@internal rabbitmq: image: rabbitmq:3.8-management restart: unless-stopped networks: pecan environment: - RABBITMQ_DEFAULT_USER=${RABBITMQ_DEFAULT_USER:-guest} - RABBITMQ_DEFAULT_PASS=${RABBITMQ_DEFAULT_PASS:-guest} labels: - traefik.enable=true - traefik.http.services.rabbitmq.loadbalancer.server.port=15672 - traefik.http.routers.rabbitmq.entrypoints=web - traefik.http.routers.rabbitmq.rule=Host(`rabbitmq.pecan.localhost`) volumes: rabbitmq:/var/lib/rabbitmq postgres: image: mdillon/postgis:9.5 restart: unless-stopped networks: pecan volumes: postgres:/var/lib/postgresql/data bety: image: pecan/bety:${BETY_VERSION:-latest} restart: unless-stopped networks: pecan environment: - UNICORN_WORKER_PROCESSES=1 - SECRET_KEY_BASE=${BETY_SECRET_KEY:-notasecret} - RAILS_RELATIVE_URL_ROOT=/bety - LOCAL_SERVER=${BETY_LOCAL_SERVER:-99} depends_on: postgres labels: - traefik.enable=true - traefik.http.services.bety.loadbalancer.server.port=8000 - traefik.http.routers.bety.rule=Host(`${TRAEFIK_HOST:-pecan.localhost}`) && PathPrefix(`/bety/`) rstudio: image: pecan/base:${PECAN_VERSION:-latest} command: /work/rstudio.sh restart: unless-stopped networks: pecan depends_on: - rabbitmq - postgres environment: - KEEP_ENV=RABBITMQ_URI RABBITMQ_PREFIX RABBITMQ_PORT FQDN NAME - RABBITMQ_URI=${RABBITMQ_URI:-amqp://guest:guest@rabbitmq/%2F} - RABBITMQ_PREFIX=/ - RABBITMQ_PORT=15672 - FQDN=${PECAN_FQDN:-docker} - NAME=${PECAN_NAME:-docker} - USER=${PECAN_RSTUDIO_USER:-carya} - PASSWORD=${PECAN_RSTUDIO_PASS:-illinois} - USERID=${UID:-1001} - GROUPID=${GID:-1001} volumes: - pecan:/data - rstudio:/home labels: - traefik.enable=true - traefik.http.routers.rstudio.rule=Host(`${TRAEFIK_HOST:-pecan.localhost}`) && PathPrefix(`/rstudio/`) - traefik.http.routers.rstudio.service=rstudio - traefik.http.routers.rstudio.middlewares=rstudio-stripprefix,rstudio-headers - traefik.http.services.rstudio.loadbalancer.server.port=8787 - traefik.http.middlewares.rstudio-headers.headers.customrequestheaders.X-RStudio-Root-Path=/rstudio - traefik.http.middlewares.rstudio-stripprefix.stripprefix.prefixes=/rstudio - traefik.http.routers.rstudio-local.entrypoints=web - traefik.http.routers.rstudio-local.rule=Host(`rstudio.pecan.localhost`) - traefik.http.routers.rstudio-local.service=rstudio-local - traefik.http.services.rstudio-local.loadbalancer.server.port=8787 docs: image: pecan/docs:${PECAN_VERSION:-latest} restart: unless-stopped networks: pecan labels: - traefik.enable=true - traefik.http.services.docs.loadbalancer.server.port=80 - traefik.http.routers.docs.rule=Host(`${TRAEFIK_HOST:-pecan.localhost}`) && PathPrefix(`/`) pecan: user: ${UID:-1001}:${GID:-1001} image: pecan/web:${PECAN_VERSION:-latest} restart: unless-stopped networks: pecan environment: - RABBITMQ_URI=${RABBITMQ_URI:-amqp://guest:guest@rabbitmq/%2F} - FQDN=${PECAN_FQDN:-docker} - NAME=${PECAN_NAME:-docker} - SECRET_KEY_BASE=${BETY_SECRET_KEY:-thisisnotasecret} depends_on: - postgres - rabbitmq labels: - traefik.enable=true - traefik.http.services.pecan.loadbalancer.server.port=8080 - traefik.http.routers.pecan.rule=Host(`${TRAEFIK_HOST:-pecan.localhost}`) && PathPrefix(`/pecan/`) volumes: - pecan:/data - pecan:/var/www/html/pecan/data monitor: user: ${UID:-1001}:${GID:-1001} image: pecan/monitor:${PECAN_VERSION:-latest} restart: unless-stopped networks: pecan environment: - RABBITMQ_URI=${RABBITMQ_URI:-amqp://guest:guest@rabbitmq/%2F} - FQDN=${PECAN_FQDN:-docker} depends_on: rabbitmq labels: - traefik.enable=true - traefik.http.routers.monitor.rule=Host(`${TRAEFIK_HOST:-pecan.localhost}`) && PathPrefix(`/monitor/`) - traefik.http.routers.monitor.middlewares=monitor-stripprefix - traefik.http.middlewares.monitor-stripprefix.stripprefix.prefixes=/monitor volumes: pecan:/data executor: user: ${UID:-1001}:${GID:-1001} image: pecan/executor:${PECAN_VERSION:-latest} restart: unless-stopped networks: pecan environment: - RABBITMQ_URI=${RABBITMQ_URI:-amqp://guest:guest@rabbitmq/%2F} - RABBITMQ_PREFIX=/ - RABBITMQ_PORT=15672 - FQDN=${PECAN_FQDN:-docker} depends_on: - postgres - rabbitmq volumes: pecan:/data basgra: user: ${UID:-1001}:${GID:-1001} image: pecan/model-basgra-basgra_n_v1.0:${PECAN_VERSION:-latest} restart: unless-stopped networks: pecan environment: RABBITMQ_URI=${RABBITMQ_URI:-amqp://guest:guest@rabbitmq/%2F} depends_on: rabbitmq volumes: pecan:/data sipnet: user: ${UID:-1001}:${GID:-1001} image: pecan/model-sipnet-r136:${PECAN_VERSION:-latest} restart: unless-stopped networks: pecan environment: RABBITMQ_URI=${RABBITMQ_URI:-amqp://guest:guest@rabbitmq/%2F} depends_on: rabbitmq volumes: pecan:/data ed2: user: ${UID:-1001}:${GID:-1001} image: pecan/model-ed2-2.2.0:${PECAN_VERSION:-latest} restart: unless-stopped networks: pecan environment: RABBITMQ_URI=${RABBITMQ_URI:-amqp://guest:guest@rabbitmq/%2F} depends_on: rabbitmq volumes: pecan:/data maespa: user: ${UID:-1001}:${GID:-1001} image: pecan/model-maespa-git:${PECAN_VERSION:-latest} restart: unless-stopped networks: pecan environment: RABBITMQ_URI=${RABBITMQ_URI:-amqp://guest:guest@rabbitmq/%2F} depends_on: rabbitmq volumes: pecan:/data biocro: user: ${UID:-1001}:${GID:-1001} image: pecan/model-biocro-0.95:${PECAN_VERSION:-latest} restart: unless-stopped networks: pecan environment: RABBITMQ_URI=${RABBITMQ_URI:-amqp://guest:guest@rabbitmq/%2F} depends_on: rabbitmq volumes: pecan:/data dbsync: image: pecan/shiny-dbsync:${PECAN_VERSION:-latest} restart: unless-stopped networks: pecan depends_on: postgres labels: - traefik.enable=true - traefik.http.routers.dbsync.rule=Host(`${TRAEFIK_HOST:-pecan.localhost}`) && PathPrefix(`/dbsync/`) - traefik.http.routers.dbsync.middlewares=dbsync-stripprefix - traefik.http.middlewares.dbsync-stripprefix.stripprefix.prefixes=/monitor api: image: pecan/api:${PECAN_VERSION:-latest} user: ${UID:-1001}:${GID:-1001} networks: pecan environment: - PGHOST=${PGHOST:-postgres} - HOST_ONLY=${HOST_ONLY:-FALSE} - AUTH_REQ=${AUTH_REQ:-FALSE} - RABBITMQ_URI=${RABBITMQ_URI:-amqp://guest:guest@rabbitmq/%2F} - DATA_DIR=${DATA_DIR:-/data/} - DBFILES_DIR=${DBFILES_DIR:-/data/dbfiles/} - SECRET_KEY_BASE=${BETY_SECRET_KEY:-thisisnotasecret} labels: - traefik.enable=true - traefik.http.routers.api.rule=Host(`${TRAEFIK_HOST:-pecan.localhost}`) && PathPrefix(`/api/`) - traefik.http.services.api.loadbalancer.server.port=8000 depends_on: postgres volumes: pecan:/data/ networks: pecan: ~ volumes: traefik: ~ postgres: ~ rabbitmq: ~ pecan: ~ rstudio: ~ There are two ways you can override different values in the docker-compose.yml file. The first method is to create a file called .env that is placed in the same folder as the docker-compose.yml file. This file can override some of configuration variables used by docker-compose. For example the following is an example of the env file # This file will override the configuration options in the docker-compose # file. Copy this file to the same folder as docker-compose as .env # ---------------------------------------------------------------------- # GENERAL CONFIGURATION # ---------------------------------------------------------------------- # project name (-p flag for docker-compose) #COMPOSE_PROJECT_NAME=pecan # ---------------------------------------------------------------------- # TRAEFIK CONFIGURATION # ---------------------------------------------------------------------- # hostname of server #TRAEFIK_HOST=pecan-docker.ncsa.illinois.edu # Run traffik on port 80 (http) and port 443 (https) #TRAEFIK_HTTP_PORT=80 #TRAEFIK_HTTPS_PORT=443 # Use you real email address here to be notified if cert expires #TRAEFIK_ACME_EMAIL=pecanproj@gmail.com # ---------------------------------------------------------------------- # PEcAn CONFIGURATION # ---------------------------------------------------------------------- # what version of pecan to use #PECAN_VERSION=develop # the fully qualified hostname used for this server #PECAN_FQDN=pecan-docker.ncsa.illinois.edu # short name shown in the menu #PECAN_NAME=pecan-docker # ---------------------------------------------------------------------- # BETY CONFIGURATION # ---------------------------------------------------------------------- # what version of BETY to use #BETY_VERSION=develop # what is our server number, 99=vm, 98=docker #BETY_LOCAL_SERVER=98 # secret used to encrypt cookies in BETY #BETY_SECRET_KEY=1208q7493e8wfhdsohfo9ewhrfiouaho908ruq30oiewfdjspadosuf08q345uwrasdy98t7q243 # ---------------------------------------------------------------------- # MINIO CONFIGURATION # ---------------------------------------------------------------------- # minio username and password #MINIO_ACCESS_KEY=carya #MINIO_SECRET_KEY=illinois # ---------------------------------------------------------------------- # PORTAINER CONFIGURATION # ---------------------------------------------------------------------- # password for portainer admin account # use docker run --rm httpd:2.4-alpine htpasswd -nbB admin <password> | cut -d ":" -f 2 #PORTAINER_PASSWORD=$2y$05$5meDPBtS3NNxyGhBpYceVOxmFhiiC3uY5KEy2m0YRbWghhBr2EVn2 # ---------------------------------------------------------------------- # RABBITMQ CONFIGURATION # ---------------------------------------------------------------------- # RabbitMQ username and password #RABBITMQ_DEFAULT_USER=carya #RABBITMQ_DEFAULT_PASS=illinois # create the correct URI with above username and password #RABBITMQ_URI=amqp://carya:illinois@rabbitmq/%2F # ---------------------------------------------------------------------- # RSTUDIO CONFIGURATION # ---------------------------------------------------------------------- # Default RStudio username and password for startup of container #PECAN_RSTUDIO_USER=carya #PECAN_RSTUDIO_PASS=illinois You can also extend the docker-compose.yml file with a docker-compose.override.yml file (in the same directory), allowing you to add more services, or for example to change where the volumes are stored (see official documentation). For example the following will change the volume for postgres to be stored in your home directory: version: "3" volumes: postgres: driver_opts: type: none device: ${HOME}/postgres o: bind 25.4.3 Top-level structure The root of the docker-compose.yml file contains three sections: services – This is a list of services provided by the application, with each service corresponding to a container. When communicating with each other internally, the hostnames of containers correspond to their names in this section. For instance, regardless of the “project” name passed to docker-compose up, the hostname for connecting to the PostgreSQL database of any given container is always going to be postgres (e.g. you should be able to access the PostgreSQL database by calling the following from inside the container: psql -d bety -U bety -h postgres). The services comprising the PEcAn application are described below. networks – This is a list of networks used by the application. Containers can only communicate with each other (via ports and hostnames) if they are on the same Docker network, and containers on different networks can only communicate through ports exposed by the host machine. We just provide the network name (pecan) and resort to Docker’s default network configuration. Note that the services we want connected to this network include a networks: ... - pecan tag. For more details on Docker networks, see the official documentation. volumes – Similarly to networks, this just contains a list of volume names we want. Briefly, in Docker, volumes are directories containing files that are meant to be shared across containers. Each volume corresponds to a directory, which can be mounted at a specific location by different containers. For example, syntax like volumes: ... - pecan:/data in a service definition means to mount the pecan “volume” (including its contents) in the /data directory of that container. Volumes also allow data to persist on containers between restarts, as normally, any data created by a container during its execution is lost when the container is re-launched. For example, using a volume for the database allows data to be saved between different runs of the database container. Without volumes, we would start with a blank database every time we restart the containers. For more details on Docker volumes, see the official documentation. Here, we define three volumes: postgres – This contains the data files underlying the PEcAn PostgreSQL database (BETY). Notice that it is mounted by the postgres container to /var/lib/postgresql/data. This is the data that we pre-populate when we run the Docker commands to initialize the PEcAn database. Note that these are the values stored directly in the PostgreSQL database. The default files to which the database points (i.e. dbfiles) are stored in the pecan volume, described below. rabbitmq – This volume contains persistent data for RabbitMQ. It is only used by the rabbitmq service. pecan – This volume contains PEcAn’s dbfiles, which include downloaded and converted model inputs, processed configuration files, and outputs. It is used by almost all of the services in the PEcAn stack, and is typically mounted to /data. 25.4.4 traefik Traefik manages communication among the different PEcAn services and between PEcAn and the web. Among other things, traefik facilitates the setup of web access to each PEcAn service via common and easy-to-remember URLs. For instance, the following lines in the web service configure access to the PEcAn web interface via the URL http://pecan.localhost/pecan/ : ~ (Further details in the works…) The traefik service configuration looks like this: traefik: image: traefik:v2.9 command: - --log.level=INFO - --api=true - --api.dashboard=true - --entrypoints.web.address=:80 - --providers.docker=true - --providers.docker.endpoint=unix:///var/run/docker.sock - --providers.docker.exposedbydefault=false - --providers.docker.watch=true restart: unless-stopped networks: pecan security_opt: no-new-privileges:true ports: ${TRAEFIK_HTTP_PORT-80}:80 volumes: - traefik:/config - /var/run/docker.sock:/var/run/docker.sock:ro labels: - traefik.enable=true - traefik.http.routers.traefik.entrypoints=web - traefik.http.routers.traefik.rule=Host(`traefik.pecan.localhost`) - traefik.http.routers.traefik.service=api@internal 25.4.5 portainer portainer is lightweight management UI that allows you to manage the docker host (or swarm). You can use this service to monitor the different containers, see the logfiles, and start and stop containers. The portainer service configuration looks like this: NA: ~ Portainer is accessible by browsing to pecan.localhost/portainer/. You can either set the password in the .env file (for an example see env.example) or you can use the web browser and go to the portainer url. If this is the first time it will ask for your password. 25.4.6 minio Minio is a service that provides access to the a folder on disk through a variety of protocols, including S3 buckets and web-based access. We mainly use Minio to facilitate access to PEcAn data using a web browser without the need for CLI tools. Our current configuration is as follows: NA: ~ The Minio interface is accessible by browsing to minio.pecan.localhost. From there, you can browse directories and download files. You can also upload files by clicking the red “+” in the bottom-right corner. Note that it is currently impossible to create or upload directories using the Minio interface (except in the /data root directory – those folders are called “buckets” in Minio). Therefore, the recommended way to perform any file management tasks other than individual file uploads is through the command line, e.g. docker run -it --rm --volumes pecan_pecan:/data --volumes /path/to/local/directory:/localdir ubuntu # Now, you can move files between `/data` and `/localdir`, create new directories, etc. 25.4.7 thredds This service allows PEcAn model outputs to be accessible via the THREDDS data server (TDS). When the PEcAn stack is running, the catalog can be explored in a web browser at http://pecan.localhost/thredds/catalog.html. Specific output files can also be accessed from the command line via commands like the following: Note that everything after outputs/ exactly matches the directory structure of the workflows directory. Which files are served, which subsetting services are available, and other aspects of the data server’s behavior are configured in the docker/thredds_catalog.xml file. Specifically, this XML tells the data server to use the datasetScan tool to serve all files within the /data/workflows directory, with the additional filter that only files ending in .nc are served. For additional information about the syntax of this file, see the extensive THREDDS documentation. Our current configuration is as follows: NA: ~ 25.4.8 postgres This service provides a working PostGIS database. Our configuration is fairly straightforward: postgres: image: mdillon/postgis:9.5 restart: unless-stopped networks: pecan volumes: postgres:/var/lib/postgresql/data Some additional details about our configuration: image – This pulls a container with PostgreSQL + PostGIS pre-installed. Note that by default, we use PostgreSQL version 9.5. To experiment with other versions, you can change 9.5 accordingly. networks – This allows PostgreSQL to communicate with other containers on the pecan network. As mentioned above, the hostname of this service is just its name, i.e. postgres, so to connect to the database from inside a running container, use a command like the following: psql -d bety -U bety -h postgres volumes – Note that the PostgreSQL data files (which store the values in the SQL database) are stored on a volume called postgres (which is not the same as the postgres service, even though they share the same name). 25.4.9 rabbitmq RabbitMQ is a message broker service. In PEcAn, RabbitMQ functions as a task manager and scheduler, coordinating the execution of different tasks (such as running models and analyzing results) associated with the PEcAn workflow. Our configuration is as follows: rabbitmq: image: rabbitmq:3.8-management restart: unless-stopped networks: pecan environment: - RABBITMQ_DEFAULT_USER=${RABBITMQ_DEFAULT_USER:-guest} - RABBITMQ_DEFAULT_PASS=${RABBITMQ_DEFAULT_PASS:-guest} labels: - traefik.enable=true - traefik.http.services.rabbitmq.loadbalancer.server.port=15672 - traefik.http.routers.rabbitmq.entrypoints=web - traefik.http.routers.rabbitmq.rule=Host(`rabbitmq.pecan.localhost`) volumes: rabbitmq:/var/lib/rabbitmq Note that the traefik.http.routers.rabbitmq.rule indicates that browsing to http://rabbitmq.pecan.localhost/ leads to the RabbitMQ management console. By default, the RabbitMQ management console has username/password guest/guest, which is highly insecure. For production instances of PEcAn, we highly recommend changing these credentials to something more secure, and removing access to the RabbitMQ management console via Traefik. 25.4.10 bety This service operates the BETY web interface, which is effectively a web-based front-end to the PostgreSQL database. Unlike the postgres service, which contains all the data needed to run PEcAn models, this service is not essential to the PEcAn workflow. However, note that certain features of the PEcAn web interface do link to the BETY web interface and will not work if this container is not running. Our configuration is as follows: bety: image: pecan/bety:${BETY_VERSION:-latest} restart: unless-stopped networks: pecan environment: - UNICORN_WORKER_PROCESSES=1 - SECRET_KEY_BASE=${BETY_SECRET_KEY:-notasecret} - RAILS_RELATIVE_URL_ROOT=/bety - LOCAL_SERVER=${BETY_LOCAL_SERVER:-99} depends_on: postgres labels: - traefik.enable=true - traefik.http.services.bety.loadbalancer.server.port=8000 - traefik.http.routers.bety.rule=Host(`${TRAEFIK_HOST:-pecan.localhost}`) && PathPrefix(`/bety/`) The BETY container Dockerfile is located in the root directory of the BETY GitHub repository (direct link). 25.4.11 docs This service will show the documentation for the version of PEcAn running as well as a homepage with links to all relevant endpoints. You can access this at http://pecan.localhost/. You can find the documentation for PEcAn at http://pecan.localhost/docs/pecan/. Our current configuration is as follows: docs: image: pecan/docs:${PECAN_VERSION:-latest} restart: unless-stopped networks: pecan labels: - traefik.enable=true - traefik.http.services.docs.loadbalancer.server.port=80 - traefik.http.routers.docs.rule=Host(`${TRAEFIK_HOST:-pecan.localhost}`) && PathPrefix(`/`) 25.4.12 web This service runs the PEcAn web interface. It is effectively a thin wrapper around a standard Apache web server container from Docker Hub that installs some additional dependencies and copies over the necessary files from the PEcAn source code. Our configuration is as follows: NA: ~ Its Dockerfile ships with the PEcAn source code, in docker/web/Dockerfile. In terms of actively developing PEcAn using Docker, this is the service to modify when making changes to the web interface (i.e. PHP, HTML, and JavaScript code located in the PEcAn web directory). 25.4.13 executor This service is in charge of running the R code underlying the core PEcAn workflow. However, it is not in charge of executing the models themselves – model binaries are located on their own dedicated Docker containers, and model execution is coordinated by RabbitMQ. Our configuration is as follows: executor: user: ${UID:-1001}:${GID:-1001} image: pecan/executor:${PECAN_VERSION:-latest} restart: unless-stopped networks: pecan environment: - RABBITMQ_URI=${RABBITMQ_URI:-amqp://guest:guest@rabbitmq/%2F} - RABBITMQ_PREFIX=/ - RABBITMQ_PORT=15672 - FQDN=${PECAN_FQDN:-docker} depends_on: - postgres - rabbitmq volumes: pecan:/data Its Dockerfile is ships with the PEcAn source code, in docker/executor/Dockerfile. Its image is built on top of the pecan/base image (docker/base/Dockerfile), which contains the actual PEcAn source. To facilitate caching, the pecan/base image is itself built on top of the pecan/depends image (docker/depends/Dockerfile), a large image that contains an R installation and PEcAn’s many system and R package dependencies (which usually take ~30 minutes or longer to install from scratch). In terms of actively developing PEcAn using Docker, this is the service to modify when making changes to the PEcAn R source code. Note that, unlike changes to the web image’s PHP code, changes to the R source code do not immediately propagate to the PEcAn container; instead, you have to re-compile the code by running make inside the container. 25.4.14 monitor This service will show all models that are currently running http://pecan.localhost/monitor/. This list returned is JSON and shows all models (grouped by type and version) that are currently running, or where seen in the past. This list will also contain a list of all current active containers, as well as how many jobs are waiting to be processed. This service is also responsible for registering any new models with PEcAn so users can select it and execute the model from the web interface. Our current configuration is as follows: monitor: user: ${UID:-1001}:${GID:-1001} image: pecan/monitor:${PECAN_VERSION:-latest} restart: unless-stopped networks: pecan environment: - RABBITMQ_URI=${RABBITMQ_URI:-amqp://guest:guest@rabbitmq/%2F} - FQDN=${PECAN_FQDN:-docker} depends_on: rabbitmq labels: - traefik.enable=true - traefik.http.routers.monitor.rule=Host(`${TRAEFIK_HOST:-pecan.localhost}`) && PathPrefix(`/monitor/`) - traefik.http.routers.monitor.middlewares=monitor-stripprefix - traefik.http.middlewares.monitor-stripprefix.stripprefix.prefixes=/monitor volumes: pecan:/data 25.4.15 Model-specific containers Additional models are added as additional services. In general, their configuration should be similar to the following configuration for SIPNET, which ships with PEcAn: sipnet: user: ${UID:-1001}:${GID:-1001} image: pecan/model-sipnet-r136:${PECAN_VERSION:-latest} restart: unless-stopped networks: pecan environment: RABBITMQ_URI=${RABBITMQ_URI:-amqp://guest:guest@rabbitmq/%2F} depends_on: rabbitmq volumes: pecan:/data The PEcAn source contains Dockerfiles for ED2 (models/ed/Dockerfile) and SIPNET (models/sipnet/Dockerfile) that can serve as references. For additional tips on constructing a Dockerfile for your model, see Dockerfiles for Models. "],["model-docker.html", "25.5 Models using Docker", " 25.5 Models using Docker This section will discuss how to add new models to PEcAn docker. To be able to add a new model to PEcAn when using docker is as simple as starting a new container. The model will come online and let the PEcAn framework know there is a new model available, there is no need to go through the process of registering this model with PEcAn. Users will be able to select this new model from web interface and run with this model selected. For this process to work a docker image of the model will need to be created as well as small json file that is used to announce this new model. A separate service in PEcAn (monitor) will use this json file to keep track of all models available as well as register these models with PEcAn. Model information Model build Common problems 25.5.1 Model information Each model will have a small json file called model_info.json that is used to describe the model and used by the monitor service to register the model with PEcAn. This file will contain information about the model that is send as part of the heartbeat of the container to the monitor service. Below is an example of this file for the ED model. The required fields are name, type, version and binary. The fields type and version are used by PEcAn to send the messages to RabbitMQ. There is no need to specify the queue name explicitly. The queue name will be created by combining these two fields with an underscore. The field binary is used to point to the actual binary in the docker container. There are 2 special values that can be used, @VERSION@ which will be replaced by the version that is passed in when building the container, and @BINARY@ which will be replaced by the binary when building the docker image. { "name": "ED2.2", "type": "ED2", "version": "@VERSION@", "binary": "@BINARY@", "description": "The Ecosystem Demography Biosphere Model (ED2) is an integrated terrestrial biosphere model incorporating hydrology, land-surface biophysics, vegetation dynamics, and soil carbon and nitrogen biogeochemistry", "author": "Mike Dietze", "contributors": ["David LeBauer", "Xiaohui Feng", "Dan Wang", "Carl Davidson", "Rob Kooper", "Shawn Serbin", "Alexey Shiklomanov"], "links": { "source": "https://github.com/EDmodel/ED2", "issues": "https://github.com/EDmodel/ED2/issues" }, "inputs": {}, "citation": ["Medvigy D., Wofsy S. C., Munger J. W., Hollinger D. Y., Moorcroft P. R. 2009. Mechanistic scaling of ecosystem function and dynamics in space and time: Ecosystem Demography model version 2. J. Geophys. Res. 114 (doi:10.1029/2008JG000812)"] } Other fields that are recommended, but currently not used yet, are: description : a longer description of the model. creator : contact person about this docker image. contribuor : other people that have contributed to this docker image. links : addtional links to help people when using this docker image, for example values that can be used are source to link to the source code, issues to link to issue tracking system, and documentation to link to model specific documentation. citation : how the model should be cited in publications. 25.5.2 Model build In general we try to minimize the size of the images. To be able to do this we split the process of creating the building of the model images into two pieces (or leverage of an image that exists from the original model developers). If you look at the example Dockerfile you will see that there are 2 sections, the first section will build the model binary, the second section will build the actual PEcAn model, which copies the binary from the first section. This is an example of how the ED2 model is build. This will install all the packages needed to build ED2 model, gets the latest version from GitHub and builds the model. The second section will create the actual model runner. This will leverage the PEcAn model image that has PEcAn already installed as well as the python code to listen for messages and run the actual model code. This will install some additional packages needed by the model binary (more about that below), copy the model_info.json file and change the @VERSION@ and @BINARY@ placeholders. It is important values for type and version are set correct. The PEcAn code will use these to register the model with the BETY database, which is then used by PEcAn to send out a message to a specfic worker queue, if you do not set these variables correctly your model executor will pick up messages for the wrong model. To build the docker image, we use a Dockerfile (see example below) and run the following command. This command will expect the Dockerfile to live in the model specific folder and the command is executed in the root pecan folder. It will copy the content of the pecan folder and make it available to the build process (in this example we do not need any additional files). Since we can have multiple different versions of a model be available for PEcAn we ar using the following naming schema pecan/model-<modeltype>-<version>:<pecan version. For example the image below will be named pecan/model-ed2-git, since we do not specify the exact version it will be atomically be named pecan/model-ed2-git:latest. docker build \\ --tag pecan/model-ed2-git \\ --file models/ed/Dockerfile \\ . Example of a Dockerfile, in this case to build the ED2 model. # ---------------------------------------------------------------------- # BUILD MODEL BINARY # ---------------------------------------------------------------------- FROM debian:stretch as model-binary # Some variables that can be used to set control the docker build ARG MODEL_VERSION=git # install dependencies RUN apt-get update \\ && apt-get install -y --no-install-recommends \\ build-essential \\ curl \\ gfortran \\ git \\ libhdf5-dev \\ libopenmpi-dev \\ && rm -rf /var/lib/apt/lists/* # download, unzip and build ed2 WORKDIR /src RUN git -c http.sslVerify=false clone https://github.com/EDmodel/ED2.git \\ && cd ED2/ED/build \\ && curl -o make/include.mk.VM http://isda.ncsa.illinois.edu/~kooper/EBI/include.mk.opt.Linux \\ && if [ "${MODEL_VERSION}" != "git" ]; then git checkout ${MODEL_VERSION}; fi \\ && ./install.sh -g -p VM ######################################################################## # ---------------------------------------------------------------------- # BUILD PECAN FOR MODEL # ---------------------------------------------------------------------- FROM pecan/models:latest # ---------------------------------------------------------------------- # INSTALL MODEL SPECIFIC PIECES # ---------------------------------------------------------------------- RUN apt-get update \\ && apt-get install -y --no-install-recommends \\ libgfortran3 \\ libopenmpi2 \\ && rm -rf /var/lib/apt/lists/* # ---------------------------------------------------------------------- # SETUP FOR SPECIFIC MODEL # ---------------------------------------------------------------------- # Some variables that can be used to set control the docker build ARG MODEL_VERSION=git # Setup model_info file COPY models/ed/model_info.json /work/model.json RUN sed -i -e "s/@VERSION@/${MODEL_VERSION}/g" \\ -e "s#@BINARY@#/usr/local/bin/ed2.${MODEL_VERSION}#g" /work/model.json # COPY model binary COPY --from=model-binary /src/ED2/ED/build/ed_2.1-opt /usr/local/bin/ed2.${MODEL_VERSION} WARNING: Dockerfile environment variables set via ENV are assigned all at once; they do not evaluate successively, left to right. Consider the following block: # Don't do this! ENV MODEL_TYPE="SIPNET" \\ MODEL_VERSION=136 \\ MODEL_TYPE_VERSION=${MODEL_TYPE}_${MODEL_VERSION} # <- Doesn't know about MODEL_TYPE or MODEL_VERSION! In this block, the expansion for setting MODEL_TYPE_VERSION is not aware of the current values of MODEL_TYPE or MODEL_VERSION, and will therefore be set incorrectly to just _ (unless they have been set previously, in which case it will be aware only of their earlier values). As such, variables depending on other variables must be set in a separate, subsequent ENV statement than the variables they depend on. Once the model has build and is working we can add it to the PEcAn stack and be able to use this model in the web interface. There are two methods to start this new model. First, we can add it to the docker-compose.yml file and start the container using docker-compose -p pecan -d up. sipnet: image: pecan/model-ed2-git networks: - pecan volumes: - pecan:/data depends_on: - rabbitmq restart: unless-stopped Alternatively we can start the container manually using the following command. docker run \\ --detach \\ --rm \\ --name pecan-ed2-git \\ --networks pecan_pecan \\ --volume pecan_pecan:/data pecan/model-ed2-git 25.5.3 Common problems Following are some solutions for common problems that you might encounter when building the docker images for a model. 25.5.4 Debugging missing libraries When building the model binary it might require specific libraries to be installed. In the second stage the model binary is copied into a new image, which could result in the binary missing specific libraries. In the case of the ED2 model the following was used to find the libraries that are needed to be installed (libgfortran5 and libopenmpi3). The first step is to build the model using the Dockerfile (in this case the ap-get install was missing in the second stage). Step 5/9 : RUN git clone https://github.com/EDmodel/ED2.git && cd ED2/ED/build && curl -o make/include.mk.VM http://isda.ncsa.illinois.edu/~kooper/EBI/include.mk.opt.`uname -s` && if [ "${MODEL_VERSION}" != "git" ]; then git checkout ${MODEL_VERSION}; fi && ./install.sh -g -p VM ... LOTS OF OUTPUT ... make[1]: Leaving directory '/src/ED2/ED/build/bin-opt-E' Installation Complete. Removing intermediate container a53eba9a8fc1 ---> 7f23c6302130 Step 6/9 : FROM pecan/executor:latest ---> f19d81b739f5 ... MORE OUTPUT ... Step 9/9 : COPY --from=model-binary /src/ED2/ED/build/ed_2.1-opt /usr/local/bin/ed2.${MODEL_VERSION} ---> 07ac841be457 Successfully built 07ac841be457 Successfully tagged pecan/pecan-ed2:latest At this point we have created a docker image with the binary and all PEcAn code that is needed to run the model. Some models (especially those build as native code) might be missing additional packages that need to be installed in the docker image. To see if all libraries are installed for the binary. > docker run -ti --rm pecan/pecan-ed2 /bin/bash root@8a95ee8b6b47:/work# ldd /usr/local/bin/ed2.git | grep "not found" libmpi_usempif08.so.40 => not found libmpi_usempi_ignore_tkr.so.40 => not found libmpi_mpifh.so.40 => not found libmpi.so.40 => not found libgfortran.so.5 => not found Start the build container again (this is the number before the line FROM pecan/executor:latest, 7f23c6302130 in the example), and find the missing libraries listed above (for example libmpi_usempif08.so.40): > docker run --rm -ti 7f23c6302130 root@e716c63c031f:/src# dpkg -S libmpi_usempif08.so.40 libopenmpi3:amd64: /usr/lib/x86_64-linux-gnu/openmpi/lib/libmpi_usempif08.so.40.10.1 libopenmpi3:amd64: /usr/lib/x86_64-linux-gnu/libmpi_usempif08.so.40.10.1 libopenmpi3:amd64: /usr/lib/x86_64-linux-gnu/libmpi_usempif08.so.40 This shows the pages is libopenmpi3 that needs to be installed, do this for all missing packages, modify the Dockerfile and rebuild. Next time you run the ldd command there should be no more packages being listed. "],["docker-build-images.html", "25.6 Building and modifying images", " 25.6 Building and modifying images The only other section on this page is: Local development and testing with Docker For general use, it is sufficient to use the pre-built PEcAn images hosted on Docker Hub (see Docker quickstart). However, there are cases where it makes sense to re-build the Docker images locally. The following is a list of PEcAn-specific images and reasons why you would want to rebuild them locally: pecan/depends – Rebuild if: You modify the docker/depends/Dockerfile You introduce new system dependencies (i.e. things that need to be installed with apt-get) You introduce new R package dependencies, and you want those R package installations to be cached during future builds. For packages with fast build times, it may be fine to let them be installed as part of PEcAn’s standard build process (i.e. make). pecan/base – Rebuild if: You built a new version of pecan/depends (on which pecan/base depends) You modify the docker/base/Dockerfile You made changes to the PEcAn R package source code, the Makefile, or web/workflow.R. NOTE that changes to the web interface code affect pecan/web, not pecan/base pecan/executor – Rebuild if: You built a new version of pecan/base (on which pecan/executor depends) and/or, pecan/depends (on which pecan/base depends) You modified the docker/executor/Dockerfile You modified the RabbitMQ Python script (e.g. docker/receiver.py) pecan/web – Rebuild if you modified any of the following: docker/web/Dockerfile The PHP/HTML/JavaScript code for the PEcAn web interface in web/ (except web/workflow.R – that goes in pecan/base) docker/config.docker.php (the config.php file for Docker web instances) documentation/index_vm.html (the documentation HTML website) NOTE: Because changes to this code are applied instantly (i.e. do not require compilation or installation), a more effective way to do local development may be to mount the web/ or other relevant folders as a volume onto the pecan/web container. The easiest way to quickly re-build all of the images is using the docker.sh script in the PEcAn source code root directory. This script will build all of the docker images locally on your machine, and tag them as latest. This will not build the pecan/depends image by default because that takes considerably longer. However, you can force the script to build pecan/depends as well by setting the DEPEND environment variable to 1 (i.e. DEPEND=1 ./docker.sh). The following instructions provide details on how to build each image individually. To build an image locally, use the docker build command as described below. For more details, see docker build --help or the online Docker build documentation. First, in a terminal window, navigate (cd) into the PEcAn source code root directory. From there, the general syntax for building an image looks like the following: docker build -t pecan/<image name>:<image version> -f docker/base/Dockerfile.<image name> . For instance, to build a local version of the pecan/depends:latest image, you would run: docker build -t pecan/depends:latest -f docker/depends/Dockerfile . The breakdown of this command is as follows: docker build – This is the core command. The standard syntax is docker build [OPTIONS] <PATH>, where <PATH> refers to the directory to be used as the “build context”. The “build context” is the working directory assumed by the Dockerfiles. In PEcAn, this is always the PEcAn source code root directory, which allows Dockerfiles to use instructions such as COPY web/workflow.R /work/. In this example, the <PATH> is set to the current working directory, i.e. . because we are already in the PEcAn root directory. If you were located in a different directory, you would have to provide a path to the PEcAn source code root directory. Also, by default, docker build will look for a Dockerfile located at <PATH>/Dockerfile, but this is modified by the -f option described below. -t pecan/depends:latest – The -t/--tag option specifies how the image will be labeled. By default, Docker only defines unique image IDs, which are hexidecimal strings that are unintuitive and hard to remember. Tags are useful for referring to specific images in a human-readable way. Note that the same unique image can have multiple tags associated with it, so it is possible for, e.g. pecan/depends:latest, pecan/depends:custom, and even mypecan/somethingelse:20.0 to refer to the same exact image. To see a table of all local images, including their tags and IDs, run docker image ls. NOTE: PEcAn’s docker-compose.yml can be configured via the PECAN environment variable to point at different versions of PEcAn images. By default, it points to the :latest versions of all images. However, if you wanted to, for instance, build :local images corresponding to your local source code and then run that version of PEcAn, you would run: PECAN=local docker-compose -p pecan up -d This is an effective way to do local development and testing of different PEcAn versions, as described below. -f docker/depends/Dockerfile – The -f/--file tag is used to provide an alternative location and file name for the Dockerfile. 25.6.1 Local development and testing with Docker The following is an example of one possible workflow for developing and testing PEcAn using local Docker images. The basic idea is to mount a local version of the PEcAn source code onto a running pecan/executor image, and then send a special “rebuild” RabbitMQ message to the container to trigger the rebuild whenever you make changes. NOTE: All commands assume you are working from the PEcAn source code root directory. In the PEcAn source code directory, create a docker-compose.override.yml file with the following contents.: version: "3" services: executor: volumes: - .:/pecan This will mount the current directory . to the /pecan directory in the executor container. The special docker-compose.override.yml file is read automatically by docker-compose and overrides or extends any instructions set in the original docker-compose.yml file. It provides a convenient way to host server-specific configurations without having to modify the project-wide (and version-controlled) default configuration. For more details, see the Docker Compose documentation. Update your PEcAn Docker stack with docker-compose up -d. If the stack is already running, this should only restart your executor instance while leaving the remaining containers running. To update to the latest local code, run ./scripts/docker_rebuild.sh. Under the hood, this uses curl to post a RabbitMQ message to a running Docker instance. By default, the scripts assumes that username and password are both guest and that the RabbitMQ URL is http://rabbitmq.pecan.localhost/. All of these can be customized by setting the environment variables RABBITMQ_USER, RABBITMQ_PASSWORD, and RABBITMQ_URL, respectively (or running the script prefixed with those variables, e.g. RABBITMQ_USER=carya RABBITMQ_PASSWORD=illinois ./scripts/docker_rebuild.sh). This step can be repeated whenever you want to trigger a rebuild of the local code. NOTE: The updates with this workflow are specific to the running container session; restarting the executor container will revert to the previous versions of the installed packages. To make persistent changes, you should re-build the pecan/base and pecan/executor containers against the current version of the source code. NOTE: The mounted PEcAn source code directory includes everything in your local source directory, including installation artifacts used by make. This can lead to two common issues: - Any previous make cache files (stuff in the .install, .docs, etc. directories) persist across container instances, even though the installed packages may not. To ensure a complete build, it’s a good idea to run make clean on the host machine to remove these artifacts. - Similarly, any installation artifacts from local builds will be carried over to the build. In particular, be wary of packages with compiled code, such as modules/rtm (PEcAnRTM) – the compiled .o, .so, .mod, etc. files from compilation of such packages will carry over into the build, which can cause conflicts if the package was also built locally. The docker-compose.override.yml is useful for some other local modifications. For instance, the following adds a custom ED2 “develop” model container. services: # ... ed2devel: image: pecan/model-ed2-develop:latest build: context: ../ED2 # Or wherever ED2 source code is found networks: - pecan depends_on: - rabbitmq volumes: - pecan:/data restart: unless-stopped Similarly, this snippet modifies the pecan network to use a custom IP subnet mask. This is required on the PNNL cluster because its servers’ IP addresses often clash with Docker’s default IP mask. networks: pecan: ipam: config: - subnet: 10.17.1.0/24 "],["docker-troubleshooting.html", "25.7 Troubleshooting Docker", " 25.7 Troubleshooting Docker 25.7.1 “Package not available” while building images PROBLEM: Packages fail to install while building pecan/depends and/or pecan/base with an error like the following: Installing package into ‘/usr/local/lib/R/site-library’ (as ‘lib’ is unspecified) Warning: unable to access index for repository <URL>: cannot open URL '<URL>' Warning message: package ‘<PACKAGE>’ is not available (for R version 3.5.1) CAUSE: This can sometimes happen if there are problems with the RStudio Package manager, which is the default repository for the rocker/tidyverse containers. See GitHub issues rocker-org/rocker-versioned#102 and #58. WORKAROUND: Add the following line to the depends and/or base Dockerfiles before (i.e. above) any commands that install R packages (e.g. Rscript -e \"install.packages(...)\"): RUN echo "options(repos = c(CRAN = 'https://cran.rstudio.org'))" >> /usr/local/lib/R/etc/Rprofile.site This will set the default repository to the more reliable (albeit, more up-to-date; beware of breaking package changes!) RStudio CRAN mirror. Then, build the image as usual. "],["docker-migrate.html", "25.8 Migrating PEcAn from VM to Docker", " 25.8 Migrating PEcAn from VM to Docker This document assumes you have read through the Introduction to Docker as well as Docker quickstart and have docker running on the VM. This document will slowly replace each of the components with the appropriate docker images. At then end of this document you should be able to use the docker-compose command to bring up the full docker stack as if you had started with this origianally. 25.8.1 Running BETY as a docker container This will replace the BETY application running on the machine with a docker image. This will assume you still have the database running on the local machine and the only thing we replace is the BETY application. If you are running systemd (Ubuntu 16.04 or Centos 7) you can copy the following file to /etc/systemd/system/bety.service (replace LOCAL_SERVER=99 with your actual server). If you have postgres running on another server replace 127.0.0.1 with the actual ip address of the postgres server. [Unit] Description=BETY container After=docker.service [Service] Restart=always ExecStart=/usr/bin/docker run -t --rm --name bety --add-host=postgres:127.0.0.1 --network=host --env RAILS_RELATIVE_URL_ROOT=/bety --env LOCAL_SERVER=99 pecan/bety ExecStop=/usr/bin/docker stop -t 2 bety [Install] WantedBy=local.target At this point we can enable the bety service (this only needs to be done once). First we need to tell systemd a new service is available using systemctl daemon-reload. Next we enable the BETY service so it will restart automatically when the machine reboots, using systemctl enable bety. Finally we can start the BETY service using systemctl start bety. At this point BETY is running as a docker container on port 8000. You can see the log messages using journalctl -u bety. Next we need to modify apache configuration files. The file /etc/apache2/conf-enabled/bety.conf will be replaced with the following content: ProxyPass /bety/ http://pecan.localhost/bety/ ProxyPassReverse /bety/ http://pecan.localhost/bety/ RedirectMatch permanent ^/bety$ /bety/ Once this modified we can restart apache using systemctl restart apache2. At this point BETY is running in a container and is accessable trough the webserver at http://server/bety/. To upgrade to a new version of BETY you can now use the docker commands. You can use the following commands to stop BETY, pull the latest image down, migrate the database (you made a backup correct?) and start BETY again. systemctl stop bety docker pull pecan/bety:latest docker run -ti --rm --add-host=postgres:127.0.0.1 --network=host --env LOCAL_SERVER=99 pecan/bety migrate systemctl start bety Once you are satisfied with the migration of BETY you can remove the bety folder as well as any ruby binaries you have installed. "],["pecan-api.html", "25.9 The PEcAn Docker API", " 25.9 The PEcAn Docker API If you have a running instance of Dockerized PEcAn (or other setup where PEcAn workflows are submitted via RabbitMQ), you have the option of running and managing PEcAn workflows using the pecanapi package. For more details, see the pecanapi package vignette and function-level documentation. What follows is a lightning introduction. 25.9.0.1 Installation The package can be installed directly from GitHub via devtools::install_github: devtools::install_github("pecanproject/pecan/api@develop") 25.9.0.2 Creating and submitting a workflow With pecanapi, creating a workflow, submitting it to RabbitMQ, monitoring its progress, and processing its output can all be accomplished via an R script. Start by loading the package (and the magrittr package, for the %>% pipe operator). library(pecanapi) library(magrittr) Set your PEcAn database user ID, and create a database connection object, which will be used for database operations throughout the workflow. options(pecanapi.user_id = 99000000002) con <- DBI::dbConnect( drv = RPostgres::Postgres(), user = "bety", password = "bety", host = "localhost", port = 5432 ) Find model and site IDs for the site and model you want to run. model_id <- get_model_id(con, "SIPNET", "136") all_umbs <- search_sites(con, "umbs%disturbance") site_id <- subset(all_umbs, !is.na(mat))[["id"]] Insert a new workflow into the PEcAn database, and extract its ID. workflow <- insert_new_workflow(con, site_id, model_id, start_date = "2004-01-01", end_date = "2004-12-31") workflow_id <- workflow[["id"]] Pull all of this information together into a settings list object. settings <- list() %>% add_workflow(workflow) %>% add_database() %>% add_pft("temperate.deciduous") %>% add_rabbitmq(con = con) %>% modifyList(list( meta.analysis = list(iter = 3000, random.effects = list(on = FALSE, use_ghs = TRUE)), run = list(inputs = list(met = list(source = "CRUNCEP", output = "SIPNET", method = "ncss"))), ensemble = list(size = 1, variable = "NPP") )) Submit the workflow via RabbitMQ, and monitor its progress in the R process. submit_workflow(settings) watch_workflow(workflow_id) Use THREDDS to access and analyze the output. sipnet_out <- ncdf4::nc_open(run_dap(workflow_id, "2004.nc")) gpp <- ncdf4::ncvar_get(sipnet_out, "GPP") time <- ncdf4::ncvar_get(sipnet_out, "time") ncdf4::nc_close(sipnet_out) plot(time, gpp, type = "l") "],["rabbitmq.html", "25.10 RabbitMQ", " 25.10 RabbitMQ This section provides additional details about how PEcAn uses RabbitMQ to manage communication between its Docker containers. In PEcAn, we use the Python pika client to retrieve messages from RabbitMQ. The PEcAn.remote library has convenience functions that wrap the API to post and read messages. The executor and models use the python version of the RabbitMQ scripts to retrieve messages, and launch the appropriate code. 25.10.1 Producer – PEcAn.remote::rabbitmq_post_message The PEcAn.remote::rabbitmq_post_message function allows you to post messages to RabbitMQ from R. In the RabbitMQ documentation, it is known as a “producer”. It takes the body of the message to be posted and will return any output generated when the message is posted to the approriate queue. The function has three required arguments and two optional ones: uri – This is the URI used to connect to RabbitMQ, it will have the form amqp://username:password\\@server:5672/vhost. Most containers will have this injected using the environment variable RABBITMQ_URI. queue – The queue to post the message on, this is either pecan for a workflow to be exected, the name of the model and version to be executed, for example SIPNET_r136. message – The actual message to be send, this is of type list and will be converted to a json string representation. prefix – The code will talk to the rest api, this is the prefix of the rest api. In the case of the default docker-compse file, this will be /rabbitmq and will be injected as RABBITMQ_PREFIX. port – The code will talk to the rest api, this is the port of the rest api. In the case of the default docker-compse file, this will be 15672 and will be injected as RABBITMQ_PORT. The PEcAn.remote::start_rabbitmq function is a wrapper for this function that provides an easy way to post a folder message to RabbitMQ. 25.10.2 Consumer – receiver.py The receiver.py script runs like a daemon, constantly listening for messages. In the RabbitMQ documentation, it is known as a “consumer”. In PEcAn, you can tell that it is ready to receive messages if the corresponding logs (e.g. docker-compose logs executor) show the following message: [*] Waiting for messages. To exit press CTRL+C. Our reciever is configured by three environment variables: RABBITMQ_URI – This defines the URI where RabbitMQ is running. See corresponding argument in the producer RABBITMQ_QUEUE – This is the name of the queue on which the consumer will listen for messages, just as in the producer. APPLICATION – This specifies the name (including the path) of the default executable to run when receiving a message. At the moment, it should be an executable that runs in the directory specified by the message’s folder variable. In the case of PEcAn models, this is usually ./job.sh, such that the folder corresponds to the run directory associated with a particular runID (i.e. where the job.sh is located). For the PEcAn workflow itself, this is set to R CMD BATCH workflow.R, such that the folder is the root directory of the workflow (in the executor Docker container, something like /data/workflows/PEcAn_<workflowID>). This default executable is overridden if the message contains a custom_application key. If included, the string specified by the custom_application key will be run as a command exactly as is on the container, from the directory specified by folder. For instance, in the example below, the container will print “Hello there!” instead of running its default application. {"custom_application": "echo 'Hello there!'", "folder": "/path/to/my/dir"} NOTE that in RabbitMQ messages, the folder key is always required. 25.10.3 RabbitMQ and the PEcAn web interface RabbitMQ is configured by the following variables in config.php: $rabbitmq_host – The RabbitMQ server hostname (default: rabbitmq, because that is the name of the rabbitmq service in docker-compose.yml) $rabbitmq_port – The port on which RabbitMQ listens for messages (default: 5672) $rabbitmq_vhost – The path of the RabbitMQ Virtual Host (default: /). $rabbitmq_queue – The name of the RabbitMQ queue associated with the PEcAn workflow (default: pecan) $rabbitmq_username – The RabbitMQ username (default: guest) $rabbitmq_password – The RabbitMQ password (default: guest) In addition, for running models via RabbitMQ, you will also need to add an entry like the following to the config.php $hostlist: $hostlist=array($fqdn => array("rabbitmq" => "amqp://guest:guest@rabbitmq/%2F"), ...) This will set the hostname to the name of the current machine (defined by the $fqdn variable earlier in the config.php file) to an array with one entry, whose key is rabbitmq and whose value is the RabbitMQ URI (amqp://...). These values are converted into the appropriate entries in the pecan.xml in web/04-runpecan.php. 25.10.4 RabbitMQ in the PEcAn XML RabbitMQ is a special case of remote execution, so it is configured by the host node. An example RabbitMQ configuration is as follows: <host> <rabbitmq> <uri>amqp://guest:guest@rabbitmq/%2F</uri> <queue>sipnet_136</queue> </rabbitmq> </host> Here, uri and queue have the same general meanings as described in “producer”. Note that queue here refers to the target model. In PEcAn, RabbitMQ model queues are named as MODELTYPE_REVISION, so the example above refers to the SIPNET model version 136. Another example is ED2_git, referring to the latest git version of the ED2 model. 25.10.5 RabbitMQ configuration in Dockerfiles As described in the “consumer” section, our standard RabbitMQ receiver script is configured using three environment variables: RABBITMQ_URI, RABBITMQ_QUEUE, and APPLICATION. Therefore, configuring a container to work with PEcAn’s RabbitMQ instance requires setting these three variables in the Dockerfile using an ENV statement. For example, this excerpt from docker/base/Dockerfile.executor (for the pecan/executor image responsible for the PEcAn workflow) sets these variables as follows: ENV RABBITMQ_URI="amqp://guest:guest@rabbitmq/%2F" \\ RABBITMQ_QUEUE="pecan" \\ APPLICATION="R CMD BATCH workflow.R" Similarly, this excerpt from docker/models/Dockerfile.sipnet (which builds the SIPNET model image) is a typical example for a model image. Note the use of ARG here to specify a default version model version of 136 while allowing this to be configurable (via --build-arg MODEL_VERSION=X) at build time: ARG MODEL_VERSION=136 ENV APPLICATION="./job.sh" \\ MODEL_TYPE="SIPNET" \\ MODEL_VERSION="${MODEL_VERSION}" ENV RABBITMQ_QUEUE="${MODEL_TYPE}_${MODEL_VERSION}" WARNING: Dockerfile environment variables set via ENV are assigned all at once; they do not evaluate successively, left to right. Consider the following block: # Don't do this! ENV MODEL_TYPE="SIPNET" \\ MODEL_VERSION=136 \\ RABBITMQ_QUEUE=${MODEL_TYPE}_${MODEL_VERSION} # <- Doesn't know about MODEL_TYPE or MODEL_VERSION! In this block, the expansion for setting RABBITMQ_QUEUE is not aware of the current values of MODEL_TYPE or MODEL_VERSION, and will therefore be set incorrectly to just _ (unless they have been set previously, in which case it will be aware only of their earlier values). As such, variables depending on other variables must be set in a separate, subsequent ENV statement than the variables they depend on. 25.10.6 Case study: PEcAn web interface The following describes in general terms what happens during a typical run of the PEcAn web interface with RabbitMQ. The user initializes all containers with docker-compose up. All the services that interact with RabbitMQ (executor and all models) run receiver.py in the foreground, waiting for messages to tell them what to do. The user browses to http://pecan.localhost/pecan/ and steps through the web interface. All the pages up to the 04-runpecan.php run on the web container, and are primarily for setting up the pecan.xml file. Once the user starts the PEcAn workflow at 04-runpecan.php, the underlying PHP code connects to RabbitMQ (based on the URI provided in config.php) and posts the following message to the pecan queue: {"folder": "/workflows/PEcAn_WORKFLOWID", "workflowid": "WORKFLOWID"} The executor service, which is listening on the pecan queue, hears this message and executes its APPLICATION (R CMD BATCH workflow.R) in the working directory specified in the message’s folder. The executor service then performs the pre-execution steps (e.g. trait meta-analysis, conversions) itself. Then, to actually execute the model, executor posts the following message to the target model’s queue: {"folder": "/workflows/PEcAn_WORKFLOWID/run/RUNID"} The target model service, which is listening on its dedicated queue, hears this message and runs its APPLICATION, which is job.sh, in the directory indicated by the message. Upon exiting (normally), the model service writes its status into a file called rabbitmq.out in the same directory. The executor container continuously looks for the rabbitmq.out file as an indication of the model run’s status. Once it sees this file, it reads the status and proceeds with the post-execution parts of the workflow. (NOTE that this isn’t perfect. If the model running process exits abnormally, the rabbitmq.out file may not be created, which can cause the executor container to hang. If this happens, the solution is to restart the executor container with docker-compose restart executor). "],["pecan-remote.html", "26 Remote execution with PEcAn", " 26 Remote execution with PEcAn Remote execution allows the user to leverage the power and storage of high performance computing clusters, AWS instances, or specially configured virtual machines, but without leaving their local working environment. PEcAn uses remote execution primarily to run ecosystem models. The infrastructure for remote execution lives in the PEcAn.remote package (base/remote in the PEcAn repository). This section describes the following: Checking capabilities to connect to the remote machine correctly: Basics of command line SSH SSH authentication with keys and passwords Basics of SSH tunnels, and how they are used in PEcAn Description of PEcAn related tools that control remote execution Basic remote execution R functions in PEcAn.remote SETUP- Configuration Files and settings Remote model execution configuration in the pecan.xml and config.php Additional information about preparing remote servers for execution "],["basics-of-ssh.html", "26.1 Basics of SSH", " 26.1 Basics of SSH All of the PEcAn remote infrastructure depends on the system ssh utility, so it’s important to make sure this works before attempting the advanced remote execution functionality in PEcAn. To connect to a remote server interactively, the command is simply: ssh <username>@<hostname> For instance, my connection to the BU shared computing cluster looks like: ssh ashiklom@geo.bu.edu It will prompt me for my BU password, and, if successful, will drop me into a login shell on the remote machine. Alternatively to the login shell, ssh can be used to execute arbitrary code, whose output will be returned exactly as it would if you ran the command locally. For example, the following: ssh ashiklom@geo.bu.edu pwd will run the pwd command, and return the path to my home directory on the BU SCC. The more advanced example below will run some simple R code on the BU SCC and return the output as if it was run locally. ssh ashiklom@geo.bu.edu Rscript -e "seq(1, 10)" "],["ssh-authentication-password-vs.-ssh-key.html", "26.2 SSH authentication – password vs. SSH key", " 26.2 SSH authentication – password vs. SSH key Because this server uses passwords for authentication, this command will then prompt me for my password. An alternative to password authentication is using SSH keys. Under this system, the host machine (say, your laptop, or the PEcAn VM) has to generate a public and private key pair (using the ssh-keygen command). The private key (by default, a file in ~/.ssh/id_rsa) lives on the host machine, and should never be shared with anyone. The public key will be distributed to any remote machines to which you want the host to be able to connect. On each remote machine, the public key should be added to a list of authorized keys located in the ~/.ssh/authorized_keys file (on the remote machine). The authorized keys list indicates which machines (technically, which keys – a single machine, and even a single user, can have many keys) are allowed to connect to it. This is the system used by all of the PEcAn servers (pecan1, pecan2, test-pecan). "],["ssh-tunneling.html", "26.3 SSH tunneling", " 26.3 SSH tunneling SSH authentication can be more advanced than indicated above, especially on systems that require dual authentication. Even simple password-protection can be tricky in scripts, since (by design) it is fairly difficult to get SSH to accept a password from anything other than the raw keyboard input (i.e. SSH doesn’t let you pass passwords as input or arguments, because this exposes your password as plain text). A convenient and secure way to follow SSH security protocol, but prevent having to go through the full authentication process every time, is to use SSH tunnels (or “sockets”, which are effectively synonymous). Essentially, an SSH socket is a read- and write-protectected file that contains all of the information about an SSH connection. To create an SSH tunnel, use a command like the following: ssh -n -N -f -o ControlMaster=yes -S /path/to/socket/file <username>@<hostname> If appropriate, this will prompt you for your password (if using password authentication), and then will drop you back to the command line (thanks to the -N flag, which runs SSH without executing a command, the -f flag, which pushes SSH into the background, and the -n flag, which prevents ssh from reading any input). It will also create the file /path/to/socket/file. To use this socket with another command, use the -S /path/to/file flag, pointing to the same tunnel file you just created. ssh -S /path/to/socket/file <hostname> <optional command> This will let you access the server without any sort of authentication step. As before, if <optional command> is blank, you will be dropped into an interactive shell on the remote, or if it’s a command, that command will be executed and the output returned. To close a socket, use the following: ssh -S /path/to/socket/file <hostname> -O exit This will delete the socket file and close the connection. Alternatively, a scorched earth approach to closing the SSH tunnel if you don’t remember where you put the socket file is something like the following: pgrep ssh # See which processes will be killed pkill ssh # Kill those processes …which will kill all user processes called ssh. To automatically create tunnels following a specific pattern, you can add the following to your ~/.ssh/config Host <hostname goes here> ControlMaster auto ControlPath /tmp/%r@%h:%p For more information, see man ssh. "],["ssh-tunnels-and-pecan.html", "26.4 SSH tunnels and PEcAn", " 26.4 SSH tunnels and PEcAn Many of the PEcAn.remote functions assume that a tunnel is already open. If working from the web interface, the tunnel will be opened for you by some under-the-hood PHP and Bash code, but if debugging or working locally, you will have to create the tunnel yourself. The best way to do this is to create the tunnel first, outside of R, as described above. (In the following examples, I’ll use my username ashiklom connecting to the test-pecan server with a socket stored in /tmp/testpecan. To follow along, replace these with your own username and designated server, respectively). ssh -nNf -o ControlMaster=yes -S /tmp/testpecan ashiklom@test-pecan.bu.edu Then, in R, create a host object, which is just a list containing the elements name (hostname) and tunnel (path to tunnel file). my_host <- list(name = "test-pecan.bu.edu", tunnel = "/tmp/testpecan") This host object can then be used in any of the remote execution functions. "],["basic-remote-execute-functions.html", "26.5 Basic remote execute functions", " 26.5 Basic remote execute functions The PEcAn.remote::remote.execute.cmd function runs a system command on a remote server (or on the local server, if host$name == \"localhost\"). x <- PEcAn.remote::remote.execute.cmd(host = my_host, cmd = "echo", args = "Hello world") x Note that remote.execute.cmd is similar to base R’s system2, in that the base command (in this case, echo) is passed separately from its arguments (\"Hello world\"). Note also that the output of the remote command is returned as a character. For R code, there is a special wrapper around remote.execute.cmd – PEcAn.remote::remote.execute.R, which runs R code (passed as a string) on a remote and returns the output. code <- " x <- 2:4 y <- 3:1 x ^ y " out <- PEcAn.remote::remote.execute.R(code = code, host = my_host) For additional functions related to remote file operations and other stuff, see the PEcAn.remote package documentation. "],["remote-model-execution-with-pecan.html", "26.6 Remote model execution with PEcAn", " 26.6 Remote model execution with PEcAn The workhorse of remote model execution is the PEcAn.workflow::start_model_runs function, which distributes execution of each run in a list of runs (e.g. multiple runs in an ensemble) to the local machine or a remote based on the configuration in the PEcAn settings. Broadly, there are three major types of model execution: Serialized (PEcAn.remote::start_serial) – This runs models one at a time, directly on the local machine or remote (i.e. same as calling the executables one at a time for each run). Via a queue system, (PEcAn.remote::start_qsub) – This uses a queue management system, such as SGE (e.g. qsub, qstat) found on the BU SCC machines, to submit jobs. For computationally intensive tasks, this is the recommended way to go. Via a model launcher script (PEcAn.remote::setup_modellauncher) – This is a highly customizable approach where task submission is controlled by a user-provided script (launcher.sh). "],["xml-configuration.html", "26.7 XML configuration", " 26.7 XML configuration The relevant section of the PEcAn XML file is the <host> block. Here is a minimal example from one of my recent runs: <host> <name>geo.bu.edu</name> <user>ashiklom</user> <tunnel>/home/carya/output//PEcAn_99000000008/tunnel/tunnel</tunnel> </host> Breaking this down: name – The hostname of the machine where the runs will be performed. Set it to localhost to run on the local machine. user – Your username on the remote machine (note that this may be different from the username on your local machine). tunnel – This is the tunnel file for the connection used by all remote execution files. The tunnel is created automatically by the web interface, but must be created by the user for command line execution. This configuration will run in serialized mode. To use qsub, the configuration is slightly more involved: <host> <name>geo.bu.edu</name> <user>ashiklom</user> <qsub>qsub -V -N @NAME@ -o @STDOUT@ -e @STDERR@ -S /bin/bash</qsub> <qsub.jobid>Your job ([0-9]+) .*</qsub.jobid> <qstat>qstat -j @JOBID@ || echo DONE</qstat> <tunnel>/home/carya/output//PEcAn_99000000008/tunnel/tunnel</tunnel> </host> The additional fields are as follows: qsub – The command used to submit jobs to the queue system. Despite the name, this can be any command used for any queue system. The following variables are available to be set here: @NAME@ – Job name to display @STDOUT@ – File to which stdout will be redirected @STDERR@ – File to which stderr will be redirected qsub.jobid – A regular expression, from which the job ID will be determined. This string will be parsed by R as jobid <- gsub(qsub.jobid, \"\\\\1\", output) – note that the first pattern match is taken as the job ID. qstat – The command used to check the status of a job. Internally, PEcAn will look for the DONE string at the end, so a structure like <some command indicating if any jobs are still running> || echo DONE is required. The @JOBID@ here is the job ID determined from the qsub.jobid parsing. Documentation for using the model launcher is currently unavailable. "],["configuration-for-pecan-web-interface.html", "26.8 Configuration for PEcAn web interface", " 26.8 Configuration for PEcAn web interface The config.php has a few variables that will control where the web interface can run jobs, and how to run those jobs. It is located in the /web directory and if you have not touched it yet it will be named as config.example.php. Rename it to ’config.php` and edit by folowing the following directions. These variables are $hostlist, $qsublist, $qsuboptions, and $SSHtunnel. In the near future $hostlist, $qsublist, $qsuboptions will be combined into a single list. $SSHtunnel : points to the script that creates an SSH tunnel. The script is located in the web folder and the default value of dirname(__FILE__) . DIRECTORY_SEPARATOR . \"sshtunnel.sh\"; most likely will work. $hostlist : is an array with by default a single value, only allowing jobs to run on the local server. Adding any other servers to this list will show them in the pull down menu when selecting machines, and will trigger the web page to be show to ask for a username and password for the remote execution (make sure to use HTTPS setup when asking for password to prevent it from being send in the clear). $qsublist : is an array of hosts that require qsub to be used when running the models. This list can include $fqdn to indicate that jobs on the local machine should use qsub to run the models. $qsuboptions : is an array that lists options for each machine. Currently it support the following options (see also [Run Setup] and look at the tags) array("geo.bu.edu" => array("qsub" => "qsub -V -N @NAME@ -o @STDOUT@ -e @STDERR@ -S /bin/bash", "jobid" => "Your job ([0-9]+) .*", "qstat" => "qstat -j @JOBID@ || echo DONE", "job.sh" => "module load udunits R/R-3.0.0_gnu-4.4.6", "models" => array("ED2" => "module load hdf5")) In this list qsub is the actual command line for qsub, jobid is the text returned from qsub, qstat is the command to check to see if the job is finished. job.sh and the value in models are additional entries to add to the job.sh file generated to run the model. This can be used to make sure modules are loaded on the HPC cluster before running the actual model. "],["running-pecan-code-for-remotely.html", "26.9 Running PEcAn code for remotely", " 26.9 Running PEcAn code for remotely You do not need to download PEcAn fully on your remote machine. You can compile and install the model specific code pieces of PEcAn on the cluster easily without having to install the full code base of PEcAn (and all OS dependencies). Use the git clone command to: devtools::install_github("pecanproject/pecan", subdir = 'base/utils') Next we need to install the model specific pieces, this is done almost the same (example for ED2): devtools::install_github("pecanproject/pecan", subdir = 'models/ed') This should install dependencies required. The following are some notes on how to install the model specifics on different HPC clusters* "],["special-case-geo.bu.html", "26.10 Special case: geo.bu.edu", " 26.10 Special case: geo.bu.edu Following modules need to be loaded: module load hdf5 udunits R/R-3.0.0_gnu-4.4.6 Next the following packages need to be installed, otherwise it will fall back on the older versions install site-library install.packages(c('udunits2', 'lubridate'), configure.args=c(udunits2='--with-udunits2-lib=/project/earth/packages/udunits-2.1.24/lib --with-udunits2-include=/project/earth/packages/udunits-2.1.24/include')) Finally to install support for both ED and SIPNET: devtools::install_github("pecanproject/pecan", subdir = 'base/utils') devtools::install_github("pecanproject/pecan", subdir = 'models/sipnet') devtools::install_github("pecanproject/pecan", subdir = 'models/ed') "],["data-assimilation-with-dart.html", "27 Data assimilation with DART", " 27 Data assimilation with DART In addition to the state assimilation routines found in the assim.sequential module, another approach for state data assimilation in PEcAn is through the DART workflow created by the DARES group in NCAR. This section gives a straight-forward explanation how to implement DART, focused on the technical aspects of the implementation. If there are any questions, feel free to send @Viskari an email (tt.viskari@gmail.com) or contacting DART support as they are quite awesome in helping people with problems. Also, if there are any suggestions on how to improve the wiki, please let me know. Running with current folders in PEcAn Currently the DART folders in PEcAn are that you can simply copy the structure there over a downloaded DART workflow and it should replace/add relevant files and folders. The most important step after that is to check and change the run paths in the following files: Path_name files in the work folders T_ED2IN file, as it indicates where the run results be written. advance_model.csh, as it indicates where to copy files from/to. Second thing is setting the state vector size. This is explained in more detail below, but essentially this is governed by the variable model_size in model_mod.f90. In addition to this, it should be changed in utils/F2R.f90 and R2F.f90 programs, which are responsible for reading and writing state variable information for the different ensembles. This also will be expanded below. Finally, the new state vector size should be updated for any other executable that runs it. Third thing needed are the initial condition and observation sequence files. They will always follow the same format and are explained in more detail below. Finally the ensemble size, which is the easiest to change. In the work subfolder, there is a file named input.nml. Simply changing the ensemble size there will set it for the run itself. Also remember that initial conditions file should have the equal amount of state vectors as there are ensemble members. Adjusting the workflow The central file for the actual workflow is advance_model.csh. It is a script DART calls to determine how the state vector changes between the two observation times and is essentially the only file one needs to change when changing state models or observations operators. The file itself should be commented to give a good idea of the flow, but beneath is a crude order of events. 1. Create a temporary folder to run the model in and copy/link required files in to it. 2. Read in the state vector values and times from DART. Here it is important to note that the values will be in binary format, which need to be read in by a Fortran program. In my system, there is a program called F2R which reads in the binary values and writes out in ascii form the state vector values as well as which ED2 history files it needs to copy based on the time stamps. 3. Run the observation operator, which writes the state vector state in to the history files and adjusts them if necessary. 4. Run the program. 5. Read the new state vector values from output files. 6. Convert the state vector values to the binary. In my system, this is done by the program R2F. Initial conditions file The initial conditions file, commonly named filter_ics although you can set it to something else in input.nml, is relatively simple in structure. It has one sequence repeating over the number of ensemble members. First line contains two times: Seconds and days. Just use one of them in this situation, but it has to match the starting time given in input.nml. After that each line should contain a value from the state vector in the order you want to treat them. R functions filter_ics.R and B_filter_ics.R in the R folder give good examples of how to create these. Observations files The file which contains the observations is commonly known as obs_seq.out, although again the name of the file can be changed in input.nml. The structure of the file is relatively straight-forward and the R function ObsSeq.R in the R subfolder has the write structure for this. Instead of writing it out here, I want to focus on a few really important details in this file. Each observations will have a time, a value, an uncertainty, a location and a kind. The first four are self-explanatory, but the kind is really important, but also unfortunately really easy to misunderstand. In this file, the kind does not refer to a unit or a type of observation, but which member of the state vector is this observation of. So if the kind was, for example, 5, it would mean that it was of the fifth member of the state vector. However, if the kind value is positive, the system assumes that there is some sort of an operator change in comparing the observation and state vector value which is specified in a subprogram in model_mod.f90. So for an direct identity comparison between the observation and the state vector value, the kind needs to be negative number of the state vector component. Thus, again if the observation is of the fifth state vector value, the kind should be set as -5. Thus it is recommendable that the state vector values have already been altered to be comparable with the observations. As for location, there are many ways to set in DART and the method needs to be chosen when compiling the code by giving the program which of the location mods it is to use. In our examples we used a 1-dimensional location vector with scaled values between 0 and 1. For future it makes sense to switch to a 2 dimensional long- and lat-scale, but for the time being the location does not impact the system a lot. The main impact will be if the covariances will be localized, as that will be decided on their locations. State variable vector in DART Creating/adjusting a state variable vector in DART is relatively straight-forward. Below are listed the steps to specify a state variable vector in DART. I. For each specific model, there should be an own folder within the DART root models folder. In this folder there is a model_mod.f90, which contains the model specific subroutines necessary for a DART run. At the beginning of this file there should be the following line: integer, parameter :: model_size = [number] The number here should be the number of variables in the vector. So for example if there were three state variables, then the line should look like this: integer, parameter :: model_size = 3 This number should also be changed to match with any of the other executables called during the run as indicated by the list above. In the DART root, there should be a folder named obs_kind, which contains a file called DEFAULT_obs_kind_mod.F90. It is important to note that all changes should be done to this file instead of obs_kind_mod.f90, as during compilation DART creates obs_kind_mod.f90 from DEFAULT_obs_kind_mod.F90. This program file contains all the defined observation types used by DART and numbers them for easier reference later. Different types are classified according to observation instrument or relevant observation phenomenon. Adding a new type only requires finding an unused number and starting a new identifying line with the following: integer, parameter, public :: & KIND_… Note that the observation kind should always be easy to understand, so avoid using unnecessary acronyms. For example, when adding an observation type for Leaf Area Index, it would look like below: integer, parameter, public :: & KIND_LEAF_AREA_INDEX = [number] In the DART root, there should be a folder named obs_def, which contains several files starting with obs_def_. There files contain the different available observation kinds classified either according to observation instrument or observable system. Each file starts with the line ! BEGIN DART PREPROCESS KIND LIST And end with line ! END DART PREPROCESS KIND LIST The lines between these two should contain ! The desired observation reference, the observation type, COMMON_CODE. For example, for observations relating to phenology, I have created a file called obs_def_phen_mod.f90. In this file I define the Leaf Area Index observations in the following way. ! BEGIN DART PREPROCESS KIND LIST ! LAI, TYPE_LEAF_AREA_INDEX, COMMON_CODE ! END DART PREPROCESS KIND LIST Note that the exclamation marks are necessary for the file. In the model specific folder, in the work subfolder there is a namelist file input.nml. This contains all the run specific information for DART. In it, there is a subtitle &preprocess, under which there is a line input_files = ‘….’ This input_files sections must be set to refer to the obs_def file created in step III. The input files can contain references to multiple obs_def files if necessary. As an example, the reference to the obs_def_phen_mod.f90 would look like input_files = ‘../../../obs_def/obs_def_phen_mod.f90’ V. Finally, as an optional step, the different values in state vector can be typed. In model_mod, referred to in step I, there is a subroutine get_state_meta_data. In it, there is an input variable index_in, which refers to the vector component. So for instance for the second component of the vector index_in would be 2. If this is done, the variable kind has to be also included at the beginning of the model_mod.f90 file, at the section which begins use obs_kind_mod, only :: The location of the variable can be set, but for a 0-dimensional model we are discussing here, this is not necessary. Here, though, it is possible to set the variable types by including the following line if(index_in .eq. [number]) var_type = [One of the variable kinds set in step II] If the length of the state vector is changed, it is important that the script ran with DART produces a vector of that length. Change appropriately if necessary. After these steps, DART should be able to run with the state vector of interest. "],["miscellaneous.html", "28 Miscellaneous ", " 28 Miscellaneous "],["todo.html", "28.1 TODO", " 28.1 TODO Add list of developers "],["using-the-pecan-download_file-function.html", "28.2 Using the PEcAn download_file() function", " 28.2 Using the PEcAn download_file() function download_file(url, destination_file, method) This custom PEcAn function works together with the base R function download.file (https://stat.ethz.ch/R-manual/R-devel/library/utils/html/download.file.html). However, it provides expanded functionality to generalize the use for a broad range of environments. This is because some computing environments are behind a firewall or proxy, including FTP firewalls. This may require the use of a custom FTP program and/or initial proxy server authentication to retrieve the files needed by PEcAn (e.g. meteorology drivers, other inputs) to run certain model simulations or tools. For example, the Brookhaven National Laboratory (BNL) requires an initial connection to a FTP proxy before downloading files via FTP protocol. As a result, the computers running PEcAn behind the BNL firewall (e.g. https://modex.bnl.gov) use the ncftp cleint (http://www.ncftp.com/) to download files for PEcAn because the base options with R::base download.file() such as curl, libcurl which don’t have the functionality to provide credentials for a proxy or even those such as wget which do but don’t easily allow for connecting through a proxy server before downloading files. The current option for use in these instances is ncftp, specifically ncftpget Examples: HTTP download_file("http://lib.stat.cmu.edu/datasets/csb/ch11b.txt","~/test.download.txt") FTP download_file("ftp://ftp.cdc.noaa.gov/Datasets/NARR/monolevel/pres.sfc.2000.nc", "~/pres.sfc.2000.nc") customizing to use ncftp when running behind an FTP firewall (requires ncftp to be installed and availible) download_file("ftp://ftp.cdc.noaa.gov/Datasets/NARR/monolevel/pres.sfc.2000.nc", "~/pres.sfc.2000.nc", method=""ncftpget") On modex.bnl.gov, the ncftp firewall configuration file (e.g. ~/.ncftp/firewall) is configured as: firewall-type=1 firewall-host=ftpgateway.sec.bnl.local firewall-port=21 which then allows for direct connection through the firewall using a command like: ncftpget ftp://ftp.unidata.ucar.edu/pub/netcdf/netcdf-fortran-4.4.4.tar.gz To allow the use of ncftpget from within the download.file() function you need to set your R profile download.ftp.method option in your options list. To see your current R options run options() from R cmd, which should look something like this: > options() $add.smooth [1] TRUE $bitmapType [1] "cairo" $browser [1] "/usr/bin/xdg-open" $browserNLdisabled [1] FALSE $CBoundsCheck [1] FALSE $check.bounds [1] FALSE $citation.bibtex.max [1] 1 $continue [1] "+ " $contrasts unordered ordered "contr.treatment" "contr.poly" In order to set your download.ftp.method option you need to add a line such as # set default FTP options(download.ftp.method = "ncftpget") In your ~/.Rprofile. On modex at BNL we have set the global option in /usr/lib64/R/etc/Rprofile.site. Once this is done you should be able to see the option set using this command in R: > options("download.ftp.method") $download.ftp.method [1] "ncftpget" "],["faq.html", "29 FAQ", " 29 FAQ "],["pecan-project-used-in-courses.html", "30 PEcAn Project Used in Courses ", " 30 PEcAn Project Used in Courses "],["university-classes.html", "30.1 University classes", " 30.1 University classes 30.1.1 GE 375 - Environmental Modeling - Spring 2013, 2014 (Mike Dietze, Boston University) The final “Case Study: Terrestrial Ecosystem Models” is a PEcAn-based hands-on activity. Each class has been 25 students. GE 585 - Ecological forecasting Fall 2013 (Mike Dietze, Boston University) "],["summer-courses-workshops.html", "30.2 Summer Courses / Workshops", " 30.2 Summer Courses / Workshops 30.2.1 Annual summer course in flux measurement and advanced modeling (Mike Dietze, Ankur Desai) Niwot Ridge, CO About 1/3 lecture, 2/3 hands-on (the syllabus is actually wrong as it list the other way around). Each class has 24 students. 2013 Syllabus see Tuesday Week 2 Data Assimilation lectures and PEcAn demo and the Class projects and presentations on Thursday and Friday. (Most students use PEcAn for their group projects. 2014 will be the third year that PEcAn has been used for this course. 30.2.2 Assimilating Long-Term Data into Ecosystem Models: Paleo-Ecological Observatory Network (PalEON) Project Here is a link to the course: https://www3.nd.edu/~paleolab/paleonproject/summer-course/ This course uses the same demo as above, including collecting data in the field and assimilating it (part 3) 30.2.3 Integrating Evidence on Forest Response to Climate Change: Physiology to Regional Abundance http://blue.for.msu.edu/macrosystems/workshop May 13-14, 2013 Session 4: Integrating Forest Data Into Ecosystem Models 30.2.4 Ecological Society of America meetings Workshop: Combining Field Measurements and Ecosystem Models "],["selected-publications.html", "30.3 Selected Publications", " 30.3 Selected Publications Dietze, M.C., D.S LeBauer, R. Kooper (2013) On improving the communication between models and data. Plant, Cell, & Environment doi:10.1111/pce.12043 LeBauer, D.S., D. Wang, K. Richter, C. Davidson, & M.C. Dietze. (2013). Facilitating feedbacks between field measurements and ecosystem models. Ecological Monographs. doi:10.1890/12-0137.1 "],["package-dependencies.html", "31 Package Dependencies ", " 31 Package Dependencies "],["executive-summary-what-to-usually-do.html", "31.1 Executive Summary: What to usually do", " 31.1 Executive Summary: What to usually do When you’re editing one PEcAn package and want to use a function from any other R package (including other PEcAn packages), the standard method is to add the other package to the Imports: field of your DESCRIPTION file, spell the function in fully namespaced form (pkg::function()) everywhere you call it, and be done. There are a few cases where this isn’t enough, but they’re rarer than you think. The rest of this section mostly deals with the exceptions to this rule and why not to use them when they can be avoided. "],["big-picture-whats-possible-to-do.html", "31.2 Big Picture: What’s possible to do", " 31.2 Big Picture: What’s possible to do To make one PEcAn package use functionality from another R package (including other PEcAn packages), you must do at least one and up to four things in your own package. Always, declare which packages your package depends on, so that R can install them as needed when someone installs your package and so that human readers can understand what additional functionality it uses. Declare dependencies by manually adding them to your package’s DESCRIPTION file. Sometimes, import functions from the dependency package into your package’s namespace, so that your functions know where to find them. This is only sometimes necessary, because you can usually use :: to call functions without importing them. Import functions by writing Roxygen @importFrom statements and do not edit the NAMESPACE file by hand. Rarely, load dependency code into the R environment, so that the person using your package can use it without loading it separately. This is usually a bad idea, has caused many subtle bugs, and in PEcAn it should only be used when unavoidable. When unavoidable, prefer requireNamespace(... quietly = TRUE) over Depends: or require() or library(). Only if your dependency relies on non-R tools, install any components that R won’t know how to find for itself. These components are often but not always identifiable from a SystemRequirements field in the dependency’s DESCRIPTION file. The exact installation procedure will vary case by case and from one operating system to another, and for PEcAn the key point is that you should skip this step until it proves necessary. When it does prove necessary, edit the documentation for your package to include advice on installing the dependency components, then edit the PEcAn build and testing scripts as needed so that they follow your advice. The advice below about each step is written specifically for PEcAn, although much of it holds for R packages in general. For more details about working with dependencies, start with Hadley Wickham’s R packages and treat the CRAN team’s Writing R Extensions as the final authority. "],["declaring-dependencies-depends-suggests-imports.html", "31.3 Declaring Dependencies: Depends, Suggests, Imports", " 31.3 Declaring Dependencies: Depends, Suggests, Imports List all dependencies in the DESCRIPTION file. Every package that is used by your package’s code must appear in exactly one of the sections Depends, Imports, or Suggests. Please list packages in alphabetical order within each section. R doesn’t care about the order, but you will later when you’re trying to check whether this package uses a particular dependency. Imports is the correct place to declare most PEcAn dependencies. This ensures that they get installed, but does not automatically import any of their functions – Since PEcAn style prefers to mostly use :: instead of importing, this is what we want. Depends is, despite the name, usually the wrong place to declare PEcAn dependencies. The only difference between Depends and Imports is that when the user attaches your packages to their own R workspace (e.g. using library(\"PEcAn.yourpkg\")), the packages in Depends are attached as well. Notice that a call like PEcAn.yourpkg::yourfun() will not attach your package or its dependencies, so your code still needs to import or ::-qualify all functions from packages listed in Depends. In short, Depends is not a shortcut, is for user convenience not developer convenience, and makes it easy to create subtle bugs that appear to work during interactive test sessions but fail when run from scripts. As the R extensions manual puts it (emphasis added): This [Imports and Depends] scheme was developed before all packages had namespaces (R 2.14.0 in October 2011), and good practice changed once that was in place. Field ‘Depends’ should nowadays be used rarely, only for packages which are intended to be put on the search path to make their facilities available to the end user (and not to the package itself).” The Suggests field can be used to declare dependencies on packages that make your package more useful but are not completely essential. Again from the R extensions manual: The Suggests field uses the same syntax as Depends and lists packages that are not necessarily needed. This includes packages used only in examples, tests or vignettes (see Writing package vignettes), and packages loaded in the body of functions. E.g., suppose an example from package foo uses a dataset from package bar. Then it is not necessary to have bar use foo unless one wants to execute all the examples/tests/vignettes: it is useful to have bar, but not necessary. Some of the PEcAn model interface packages push this definition of “not necessarily needed” by declaring their coupled model package in Suggests rather than Imports. For example, the PEcAn.BIOCRO package cannot do anything useful when the BioCro model is not installed, but it lists BioCro in Suggests because PEcAn as a whole can work without it. This is a compromise to simplify installation of PEcAn for users who only plan to use a few models, so that they can avoid the bother of installing BioCro if they only plan to run, say, SIPNET. Since the point of Suggests is that they are allowed to be missing, all code that uses a suggested package must behave reasonably when the package is not found. Depending on the situation, “reasonably” could mean checking whether the package is available and throwing an error as needed (PEcAn.BIOCRO uses its .onLoad function to check at load time whether BioCro is installed and will refuse to load if it is not), or providing an alternative behavior (PEcAn.data.atmosphere::get_NARR_thredds checks at call time for either parallel or doParallel and uses whichever one it finds first), or something else, but your code should never just assume that the suggested package is available. You are not allowed to import functions from Suggests into your package’s namespace, so always call them in ::-qualified form. By default R will not install suggested packages when your package is installed, but users can change this using the dependencies argument of install.packages. Note that for testing on Travis CI, PEcAn does install all Suggests (because they are required for full package checks), so any of your code that runs when a suggested package is not available will never be exercised by Travis checks. It is often tempting to move a dependency from Imports to Suggests because it is a hassle to install (large, hard to compile, no longer available from CRAN, currently broken on GitHub, etc), in the hopes that this will isolate the rest of PEcAn from the troublesome dependency. This helps for some cases, but fails for two very common ones: It does not reduce install time for CI builds, because all suggested packages need to be present when running full package checks (R CMD check or devtools::check or make check). It also does not prevent breakage when updating PEcAn via make install, because devtools::install_deps does not install suggested packages that are missing but does try to upgrade any that are already installed to the newest available version – even if the installed version took ages to compile and would have worked just fine! "],["importing-functions-use-roxygen.html", "31.4 Importing Functions: Use Roxygen", " 31.4 Importing Functions: Use Roxygen PEcAn style is to import very few functions and instead use fully namespaced function calls (pkg::fn()) everywhere it’s practical to do so. In cases where double-colon notation would be especially burdensome, such as when importing custom binary operators like %>%, it’s acceptable to import specific functions into the package namespace. Do this by using Roxygen comments of the form #' @importFrom pkg function, not by hand-editing the NAMESPACE file. If the import is only used in one or a few functions, use an @importFrom in the documentation for each function that uses it. If it is used all over the package, use a single @importFrom in the Roxygen documentation block for the whole package, which probably lives in a file called either zzz.R or <your_package_name>-package.R: #' What your package does #' #' Longer description of the package goes here. #' Probably with links to other resources about it, citations, etc. #' #' @docType package #' @name PEcAn.yourpkg #' @importFrom magrittr %>% NULL Roxygen will make sure there’s only one NAMESPACE entry per imported function no matter how many importFrom statements there are, but please pick a scheme (either import on every usage or once for the whole package), stick with it, and do not make function x() rely on an importFrom in the comments above function y(). Please do not import entire package namespaces (#' @import pkg); it increases the chance of function name collisions and makes it much harder to understand which package a given function was called from. A special note about importing functions from the tidyverse: Be sure to import from the package(s) that actually contain the functions you want to use, e.g. Imports: dplyr, magrittr, purrr / @importFrom magrittr %>% / purrr::map(...), not Imports: tidyverse / @importFrom tidyverse %>% / tidyverse::map(...). The package named tidyverse is just a interactive shortcut that loads the whole collection of constituent packages; it doesn’t export any functions in its own namespace and therefore importing it into your package doesn’t make them available. "],["loading-code-dont-but-use-requirenamespace-when-you-do.html", "31.5 Loading Code: Don’t… But Use requireNamespace When You Do", " 31.5 Loading Code: Don’t… But Use requireNamespace When You Do The very short version of this section: We want to maintain clear separation between the package’s namespace (which we control and want to keep predictable) and the global namespace (which the user controls, might change in ways we have no control over, and will be justifiably angry if we change it in ways they were not expecting). Therefore, avoid attaching packages to the search path (so no Depends and no library() or require() inside functions), and do not explicitly load other namespaces if you can help it. The longer version requires that we make a distinction often glossed over: Loading a package makes it possible for R to find things in the package namespace and does any actions needed to make it ready for use (e.g. running its .onLoad method, loading DLLs if the package contains compiled code, etc). Attaching a package (usually by calling library(\"somePackage\")) loads it if it wasn’t already loaded, and then adds it to the search path so that the user can find things in its namespace. As discussed in the “Declaring Dependancies” section above, dependencies listed in Depends will be attached when your package is attached, but they will be neither attached nor loaded when your package is loaded without being attached. Loading a dependency into the package namespace is undesirable because it makes it hard to understand our own code – if we need to use something from elsewhere, we’d prefer call it from its own namespace using :: (which implicitly loads the dependency!) or explicitly import it with a Roxygen @import directive. But in a few cases this isn’t enough. The most common reason to need to explicitly load a dependency is that some packages define new S3 methods for generic functions defined in other packages, but do not export these methods directly. We would prefer that these packages did not do this, but sometimes we have to use them anyway. An example from PEcAn is that PEcAn.MA needs to call as.matrix on objects of class mcmc.list. When the coda namespace is loaded, as.matrix(some_mcmc.list) can be correctly dispatched by base::as.matrix to the unexported method coda:::as.matrix.mcmc.list, but when coda is not loaded this dispatch will fail. Unfortunately coda does not export as.matrix.mcmc.list so we cannot call it directly or import it into the PEcAn.MA namespace, so instead we load the coda namespace whenever PEcAn.MA is loaded. Attaching packages to the user’s search path is even more problematic because it makes it hard for the user to understand how your package will affect their own code. Packages attached by a function stay attached after the function exits, so they can cause name collisions far downstream of the function call, potentially in code that has nothing to do with your package. And worse, since names are looked up in reverse order of package loading, it can cause behavior that differs in strange ways depending on the order of lines that look independent of each other: library(Hmisc) x = ... y = 3 summarize(x) # calls Hmisc::summarize y2 <- some_package_that_attaches_dplyr::innocent.looking.function(y) # Loading required package: dplyr summarize(x) # Looks identical to previous summarize, but calls dplyr::summarize! This is not to say that users will never want your package to attach another one for them, just that it’s rare and that attaching dependencies is much more likely to cause bugs than to fix them and additionally doesn’t usually save the package author any work. One possible exception to the do-not-attach-packages rule is a case where your dependency ignores all good practice and wrongly assumes, without checking, that all of its own dependencies are attached; if its DESCRIPTION uses only Depends instead of Imports, this is often a warning sign. For example, a small-but-surprising number of packages depend on the methods package without proper checks (this is probably because most but not all R interpreters attach methods by default and therefore it’s easy for an author to forget it might ever be otherwise unless they happen to test with a but-not-all interpreter). If you find yourself with a dependency that does this, accept first that you are relying on a package that is broken, and you should either convince its maintainer to fix it or find a way to remove the dependency from PEcAn. But as a short-term workaround, it is sometimes possible for your code to attach the direct dependency so that it will behave right with regard to its secondary dependencies. If so, make sure the attachment happens every time your package is loaded (e.g. by calling library(depname) inside your package’s .onLoad method) and not just when your package is attached (e.g. by putting it in Depends). When you do need to load or attach a dependency, it is probably better to do it inside your package’s .onLoad method rather than in individual functions, but this isn’t ironclad. To only load, use requireNamespace(pkgname, quietly=TRUE) – this will make it available inside your package’s namespace while avoiding (most) annoying loadtime messages and not disturbing the user’s search path. To attach when you really can’t avoid it, declare the dependency in Depends and also attach it using library(pkgname) in your .onLoad method. Note that scripts in inst/ are considered to be sample code rather than part of the package namespace, so it is acceptable for them to explicitly attach packages using library(). You may also see code that uses require(pkgname); this is just like library, but returns FALSE instead of erroring if package load fails. It is OK for scripts in inst/ that can do and do do something useful when a dependency is missing, but if it is used as if(!require(pkg)){ stop(...)} then replace it with library(pkg). If you think your package needs to load or attach code for any reason, please note why in your pull request description and be prepared for questions about it during code review. If your reviewers can think of an alternate approach that avoids loading or attaching, they will likely ask you to use it even if it creates extra work for you. "],["installing-dependencies-let-the-machines-do-it.html", "31.6 Installing dependencies: Let the machines do it", " 31.6 Installing dependencies: Let the machines do it In most cases you won’t need to think about how dependencies get installed – just declare them in your package’s DESCRIPTION and the installation will be handled automatically by R and devtools during the build process. The main exception is when a dependency relies on non-R software that R does not know how to install automatically. For example, rjags relies on JAGS, which might be installed in a different place on every machine. If your dependency falls in this category, you will know (because your CI builds will keep failing until you figure out how to fix it), but the exact details of the fix will differ for every case. "],["appendix-testthat.html", "32 Testing with the testthat package", " 32 Testing with the testthat package Tests are found in <package>/tests/testthat/ (for example, base/utils/inst/tests/) See attached file and http://r-pkgs.had.co.nz/tests.html for details on how to use the testthat package. "],["list-of-expectations.html", "32.1 List of Expectations", " 32.1 List of Expectations Full Abbreviation expect_that(x, is_true()) expect_true(x) expect_that(x, is_false()) expect_false(x) expect_that(x, is_a(y)) expect_is(x, y) expect_that(x, equals(y)) expect_equal(x, y) expect_that(x, is_equivalent_to(y)) expect_equivalent(x, y) expect_that(x, is_identical_to(y)) expect_identical(x, y) expect_that(x, matches(y)) expect_matches(x, y) expect_that(x, prints_text(y)) expect_output(x, y) expect_that(x, shows_message(y)) expect_message(x, y) expect_that(x, gives_warning(y)) expect_warning(x, y) expect_that(x, throws_error(y)) expect_error(x, y) "],["basic-use-of-the-testthat-package.html", "32.2 Basic use of the testthat package", " 32.2 Basic use of the testthat package Create a file called <packagename>/tests/testthat.R with the following contents: library(testthat) library(mypackage) test_check("mypackage") Tests should be placed in <packagename>/tests/testthat/test-<sourcefilename>.R, and look like the following: test_that("mathematical operators plus and minus work as expected",{ expect_equal(sum(1,1), 2) expect_equal(sum(-1,-1), -2) expect_equal(sum(1,NA), NA) expect_error(sum("cat")) set.seed(0) expect_equal(sum(matrix(1:100)), sum(data.frame(1:100))) }) test_that("different testing functions work, giving excuse to demonstrate",{ expect_identical(1, 1) expect_identical(numeric(1), integer(1)) expect_equivalent(numeric(1), integer(1)) expect_warning(mean('1')) expect_that(mean('1'), gives_warning("argument is not numeric or logical: returning NA")) expect_warning(mean('1'), "argument is not numeric or logical: returning NA") expect_message(message("a"), "a") }) "],["data-for-tests.html", "32.3 Data for tests", " 32.3 Data for tests Many of PEcAn’s functions require inputs that are provided as data. These can be in the data or the inst/extdata folders of a package. Data that are not package specific should be placed in the PEcAn.all (base/all) or PEcAn.utils (base/utils) packages. Some useful conventions: "],["settings-1.html", "32.4 Settings", " 32.4 Settings A generic settings can be found in the PEcAn.all package settings.xml <- system.file("pecan.biocro.xml", package = "PEcAn.BIOCRO") settings <- read.settings(settings.xml) database settings can be specified, and tests run only if a connection is available We currently use the following database to run tests against; tests that require access to a database should check db.exists() and be skipped if it returns FALSE to avoid failed tests on systems that do not have the database installed. settings$database <- list(userid = "bety", passwd = "bety", name = "bety", # database name host = "localhost" # server name) test_that(..., { skip_if_not(db.exists(settings$database)) ## write tests here }) instructions for installing this are available on the VM creation wiki examples can be found in the PEcAn.DB package (base/db/tests/testthat/). Model specific settings can go in the model-specific module, for example: settings.xml <- system.file("extdata/pecan.biocro.xml", package = "PEcAn.BIOCRO") settings <- read.settings(settings.xml) test-specific settings: settings text can be specified inline: settings.text <- " <pecan> <nocheck>nope</nocheck> ## allows bypass of checks in the read.settings functions <pfts> <pft> <name>ebifarm.pavi</name> <outdir>test/</outdir> </pft> </pfts> <outdir>test/</outdir> <database> <userid>bety</userid> <passwd>bety</passwd> <location>localhost</location> <name>bety</name> </database> </pecan>" settings <- read.settings(settings.text) values in settings can be updated: settings <- read.settings(settings.text) settings$outdir <- "/tmp" ## or any other settings "],["helper-functions-for-unit-tests.html", "32.5 Helper functions for unit tests", " 32.5 Helper functions for unit tests PEcAn.utils::tryl returns FALSE if function gives error PEcAn.utils::temp.settings creates temporary settings file PEcAn.DB::db.exists returns TRUE if connection to database is available "],["developer-devtools.html", "33 devtools package", " 33 devtools package Provides functions to simplify development Documentation: The R devtools package load_all("pkg") document("pkg") test("pkg") install("pkg") build("pkg") other tips for devtools (from the documentation): Adding the following to your ~/.Rprofile will load devtools when running R in interactive mode: # load devtools by default if (interactive()) { suppressMessages(require(devtools)) } Adding the following to your .Rpackages will allow devtools to recognize package by folder name, rather than directory path # in this example, devhome is the pecan trunk directory devhome <- "/home/dlebauer/R-dev/pecandev/" list( default = function(x) { file.path(devhome, x, x) }, "utils" = paste(devhome, "pecandev/utils", sep = "") "common" = paste(devhome, "pecandev/common", sep = "") "all" = paste(devhome, "pecandev/all", sep = "") "ed" = paste(devhome, "pecandev/models/ed", sep = "") "uncertainty" = paste(devhome, "modules/uncertainty", sep = "") "meta.analysis" = paste(devhome, "modules/meta.analysis", sep = "") "db" = paste(devhome, "db", sep = "") ) Now, devtools can take pkg as an argument instead of /path/to/pkg/, e.g. so you can use build(\"pkg\") instead of build(\"/path/to/pkg/\") "],["models_singularity.html", "34 singularity", " 34 singularity Running a model using singularity. This is work in progress. This assumes you have singulariy already installed. This will work on a Linux machine (x86_64). First make sure you have all the data files: bash script to install required files (click to expand) #!/bin/bash if [ ! -e sites ]; then curl -s -o sites.tgz http://isda.ncsa.illinois.edu/~kooper/EBI/sites.tgz tar zxf sites.tgz sed -i -e "s#/home/kooper/Projects/EBI#/data/sites#" sites/*/ED_MET_DRIVER_HEADER rm sites.tgz fi if [ ! -e inputs ]; then curl -s -o inputs.tgz http://isda.ncsa.illinois.edu/~kooper/EBI/inputs.tgz tar zxf inputs.tgz rm inputs.tgz fi if [ ! -e testrun.s83 ]; then curl -s -o testrun.s83.zip http://isda.ncsa.illinois.edu/~kooper/EBI/testrun.s83.zip unzip -q testrun.s83.zip sed -i -e "s#/home/pecan#/data#" testrun.s83/ED2IN rm testrun.s83.zip fi if [ ! -e ${HOME}/sites/Santarem_Km83 ]; then curl -s -o Santarem_Km83.zip http://isda.ncsa.illinois.edu/~kooper/EBI/Santarem_Km83.zip unzip -q -d sites Santarem_Km83.zip sed -i -e "s#/home/pecan#/data#" sites/Santarem_Km83/ED_MET_DRIVER_HEADER rm Santarem_Km83.zip fi Next edit the ED2IN file in testrun.s83 ED2IN file (click to expand) !==========================================================================================! !==========================================================================================! ! ED2IN . ! ! ! ! This is the file that contains the variables that define how ED is to be run. There ! ! is some brief information about the variables here. ! !------------------------------------------------------------------------------------------! $ED_NL !----- Simulation title (64 characters). -----------------------------------------------! NL%EXPNME = 'ED version 2.1 test' !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! Type of run: ! ! INITIAL -- Starts a new run, that can be based on a previous run (restart/history), ! ! but then it will use only the biomass and soil carbon information. ! ! HISTORY -- Resumes a simulation from the last history. This is different from ! ! initial in the sense that exactly the same information written in the ! ! history will be used here. ! !---------------------------------------------------------------------------------------! NL%RUNTYPE = 'INITIAL' !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! Start of simulation. Information must be given in UTC time. ! !---------------------------------------------------------------------------------------! NL%IMONTHA = 01 NL%IDATEA = 01 NL%IYEARA = 2001 NL%ITIMEA = 0000 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! End of simulation. Information must be given in UTC time. ! !---------------------------------------------------------------------------------------! NL%IMONTHZ = 01 NL%IDATEZ = 01 NL%IYEARZ = 2002 NL%ITIMEZ = 0000 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! DTLSM -- Time step to integrate photosynthesis, and the maximum time step for ! ! integration of energy and water budgets (units: seconds). Notice that the ! ! model will take steps shorter than this if this is too coarse and could ! ! lead to loss of accuracy or unrealistic results in the biophysics. ! ! Recommended values are < 60 seconds if INTEGRATION_SCHEME is 0, and 240-900 ! ! seconds otherwise. ! ! RADFRQ -- Time step to integrate radiation, in seconds. This must be an integer ! ! multiple of DTLSM, and we recommend it to be exactly the same as DTLSM. ! !---------------------------------------------------------------------------------------! NL%DTLSM = 600. NL%RADFRQ = 600. !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! The following variables are used in case the user wants to run a regional run. ! ! ! ! N_ED_REGION -- number of regions for which you want to run ED. This can be set to ! ! zero provided that N_POI is not... ! ! GRID_TYPE -- which kind of grid to run: ! ! 0. Longitude/latitude grid ! ! 1. Polar-stereographic ! !---------------------------------------------------------------------------------------! NL%N_ED_REGION = 0 NL%GRID_TYPE = 1 !------------------------------------------------------------------------------------! ! The following variables are used only when GRID_TYPE is set to 0. You must ! ! provide one value for each grid, except otherwise noted. ! ! ! ! GRID_RES -- Grid resolution, in degrees (first grid only, the other grids ! ! resolution will be defined by NSTRATX/NSTRATY). ! ! ED_REG_LATMIN -- Southernmost point of each region. ! ! ED_REG_LATMAX -- Northernmost point of each region. ! ! ED_REG_LONMIN -- Westernmost point of each region. ! ! ED_REG_LONMAX -- Easternmost point of each region. ! !------------------------------------------------------------------------------------! NL%GRID_RES = 1.0 NL%ED_REG_LATMIN = -12.0, -7.5, 10.0, -6.0 NL%ED_REG_LATMAX = 1.0, -3.5, 15.0, -1.0 NL%ED_REG_LONMIN = -66.0,-58.5, 70.0, -63.0 NL%ED_REG_LONMAX = -49.0,-54.5, 35.0, -53.0 !------------------------------------------------------------------------------------! !------------------------------------------------------------------------------------! ! The following variables are used only when GRID_TYPE is set to 1. ! ! ! ! NNXP -- number of points in the X direction. One value for each grid. ! ! NNYP -- number of points in the Y direction. One value for each grid. ! ! DELTAX -- grid resolution in the X direction, near the grid pole. Units: [ m]. ! ! this value is used to define the first grid only, other grids are ! ! defined using NNSTRATX. ! ! DELTAY -- grid resolution in the Y direction, near the grid pole. Units: [ m]. ! ! this value is used to define the first grid only, other grids are ! ! defined using NNSTRATX. Unless you are running some specific tests, ! ! both DELTAX and DELTAY should be the same. ! ! POLELAT -- Latitude of the pole point. Set this close to CENTLAT for a more ! ! traditional "square" domain. One value for all grids. ! ! POLELON -- Longitude of the pole point. Set this close to CENTLON for a more ! ! traditional "square" domain. One value for all grids. ! ! CENTLAT -- Latitude of the central point. One value for each grid. ! ! CENTLON -- Longitude of the central point. One value for each grid. ! !------------------------------------------------------------------------------------! NL%NNXP = 110 NL%NNYP = 70 NL%DELTAX = 60000. NL%DELTAY = 60000. NL%POLELAT = -2.609075 NL%POLELON = -60.2093 NL%CENTLAT = -2.609075 NL%CENTLON = -60.2093 !------------------------------------------------------------------------------------! !------------------------------------------------------------------------------------! ! Nest ratios. These values are used by both GRID_TYPE=0 and GRID_TYPE=1. ! ! NSTRATX -- this is will divide the values given by DELTAX or GRID_RES for the ! ! nested grids. The first value should be always one. ! ! NSTRATY -- this is will divide the values given by DELTAY or GRID_RES for the ! ! nested grids. The first value should be always one, and this must ! ! be always the same as NSTRATX when GRID_TYPE = 0, and this is also ! ! strongly recommended for when GRID_TYPE = 1. ! !------------------------------------------------------------------------------------! NL%NSTRATX = 1,4 NL%NSTRATY = 1,4 !------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! The following variables are used to define single polygon of interest runs, and ! ! they are ignored when N_ED_REGION = 0. ! ! ! ! N_POI -- number of polygons of interest (POIs). This can be zero as long as ! ! N_ED_REGION is not. ! ! POI_LAT -- list of latitudes of each POI. ! ! POI_LON -- list of longitudes of each POI. ! ! POI_RES -- grid resolution of each POI (degrees). This is used only to define the ! ! soil types ! !---------------------------------------------------------------------------------------! NL%N_POI = 1 NL%POI_LAT = -3.018 NL%POI_LON = -54.971 NL%POI_RES = 1.00 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! LOADMETH -- Load balancing method. This is used only in regional runs run in ! ! parallel. ! ! 0. Let ED decide the best way of splitting the polygons. Commonest ! ! option and default. ! ! 1. One of the methods to split polygons based on their previous ! ! work load. Developpers only. ! ! 2. Try to load an equal number of SITES per node. Useful for when ! ! total number of polygon is the same as the total number of cores. ! ! 3. Another method to split polygons based on their previous work load. ! ! Developpers only. ! !---------------------------------------------------------------------------------------! NL%LOADMETH = 0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! ED2 File output. For all the variables 0 means no output and 3 means HDF5 output. ! ! ! ! IFOUTPUT -- Fast analysis. These are mostly polygon-level averages, and the time ! ! interval between files is determined by FRQANL ! ! IDOUTPUT -- Daily means (one file per day) ! ! IMOUTPUT -- Monthly means (one file per month) ! ! IQOUTPUT -- Monthly means of the diurnal cycle (one file per month). The number ! ! of points for the diurnal cycle is 86400 / FRQANL ! ! IYOUTPUT -- Annual output. ! ! ITOUTPUT -- Instantaneous fluxes, mostly polygon-level variables, one file per year. ! ! ISOUTPUT -- restart file, for HISTORY runs. The time interval between files is ! ! determined by FRQHIS ! !---------------------------------------------------------------------------------------! NL%IFOUTPUT = 0 NL%IDOUTPUT = 0 NL%IMOUTPUT = 0 NL%IQOUTPUT = 3 NL%IYOUTPUT = 0 NL%ITOUTPUT = 0 NL%ISOUTPUT = 3 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! ATTACH_METADATA -- Flag for attaching metadata to HDF datasets. Attaching metadata ! ! will aid new users in quickly identifying dataset descriptions but ! ! will compromise I/O performance significantly. ! ! 0 = no metadata, 1 = attach metadata ! !---------------------------------------------------------------------------------------! NL%ATTACH_METADATA = 0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! UNITFAST -- The following variables control the units for FRQFAST/OUTFAST, and ! ! UNITSTATE FRQSTATE/OUTSTATE, respectively. Possible values are: ! ! 0. Seconds; ! ! 1. Days; ! ! 2. Calendar months (variable) ! ! 3. Calendar years (variable) ! ! ! ! N.B.: 1. In case OUTFAST/OUTSTATE are set to special flags (-1 or -2) ! ! UNITFAST/UNITSTATE will be ignored for them. ! ! 2. In case IQOUTPUT is set to 3, then UNITFAST has to be 0. ! ! ! !---------------------------------------------------------------------------------------! NL%UNITFAST = 0 NL%UNITSTATE = 3 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! OUTFAST/OUTSTATE -- these control the number of times per file. ! ! 0. Each time gets its own file ! ! -1. One file per day ! ! -2. One file per month ! ! > 0. Multiple timepoints can be recorded to a single file reducing ! ! the number of files and i/o time in post-processing. ! ! Multiple timepoints should not be used in the history files ! ! if you intend to use these for HISTORY runs. ! !---------------------------------------------------------------------------------------! NL%OUTFAST = 1. NL%OUTSTATE = 1. !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! ICLOBBER -- What to do in case the model finds a file that it was supposed the ! ! written? 0 = stop the run, 1 = overwrite without warning. ! ! FRQFAST -- time interval between analysis files, units defined by UNITFAST. ! ! FRQSTATE -- time interval between history files, units defined by UNITSTATE. ! !---------------------------------------------------------------------------------------! NL%ICLOBBER = 1 NL%FRQFAST = 3600. NL%FRQSTATE = 1. !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! FFILOUT -- Path and prefix for analysis files (all but history/restart). ! ! SFILOUT -- Path and prefix for history files. ! !---------------------------------------------------------------------------------------! NL%FFILOUT = '/data/testrun.s83/analy/ts83' NL%SFILOUT = '/data/testrun.s83/histo/ts83' !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! IED_INIT_MODE -- This controls how the plant community and soil carbon pools are ! ! initialised. ! ! ! ! -1. Start from a true bare ground run, or an absolute desert run. This will ! ! never grow any plant. ! ! 0. Start from near-bare ground (only a few seedlings from each PFT to be included ! ! in this run). ! ! 1. This will use history files written by ED-1.0. It will grab the ecosystem ! ! state (like biomass, LAI, plant density, etc.), but it will start the ! ! thermodynamic state as a new simulation. ! ! 2. Same as 1, but it uses history files from ED-2.0 without multiple sites, and ! ! with the old PFT numbers. ! ! 3. Same as 1, but using history files from ED-2.0 with multiple sites and ! ! TOPMODEL hydrology. ! ! 4. Same as 1, but using ED2.1 H5 history/state files that take the form: ! ! 'dir/prefix-gxx.h5' ! ! Initialization files MUST end with -gxx.h5 where xx is a two digit integer ! ! grid number. Each grid has its own initialization file. As an example, if a ! ! user has two files to initialize their grids with: ! ! example_file_init-g01.h5 and example_file_init-g02.h5 ! ! NL%SFILIN = 'example_file_init' ! ! ! ! 5. This is similar to option 4, except that you may provide several files ! ! (including a mix of regional and POI runs, each file ending at a different ! ! date). This will not check date nor grid structure, it will simply read all ! ! polygons and match the nearest neighbour to each polygon of your run. SFILIN ! ! must have the directory common to all history files that are sought to be used,! ! up to the last character the files have in common. For example if your files ! ! are ! ! /mypath/P0001-S-2000-01-01-000000-g01.h5, ! ! /mypath/P0002-S-1966-01-01-000000-g02.h5, ! ! ... ! ! /mypath/P1000-S-1687-01-01-000000-g01.h5: ! ! NL%SFILIN = '/mypath/P' ! ! ! ! 6 - Initialize with ED-2 style files without multiple sites, exactly like option ! ! 2, except that the PFT types are preserved. ! !---------------------------------------------------------------------------------------! NL%IED_INIT_MODE = 6 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! EDRES -- Expected input resolution for ED2.0 files. This is not used unless ! ! IED_INIT_MODE = 3. ! !---------------------------------------------------------------------------------------! NL%EDRES = 1.0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! SFILIN -- The meaning and the size of this variable depends on the type of run, set ! ! at variable NL%RUNTYPE. ! ! ! ! 1. INITIAL. Then this is the path+prefix of the previous ecosystem state. This has ! ! dimension of the number of grids so you can initialize each grid with a ! ! different dataset. In case only one path+prefix is given, the same will ! ! be used for every grid. Only some ecosystem variables will be set up ! ! here, and the initial condition will be in thermodynamic equilibrium. ! ! ! ! 2. HISTORY. This is the path+prefix of the history file that will be used. Only the ! ! path+prefix will be used, as the history for every grid must have come ! ! from the same simulation. ! !---------------------------------------------------------------------------------------! NL%SFILIN = '/data/sites/Santarem_Km83/s83_default.' !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! History file information. These variables are used to continue a simulation from ! ! a point other than the beginning. Time must be in UTC. ! ! ! ! IMONTHH -- the time of the history file. This is the only place you need to change ! ! IDATEH dates for a HISTORY run. You may change IMONTHZ and related in case you ! ! IYEARH want to extend the run, but yo should NOT change IMONTHA and related. ! ! ITIMEH ! !---------------------------------------------------------------------------------------! NL%ITIMEH = 0000 NL%IDATEH = 01 NL%IMONTHH = 05 NL%IYEARH = 2001 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! NZG - number of soil layers. One value for all grids. ! ! NZS - maximum number of snow/water pounding layers. This is used only for ! ! snow, if only liquid water is standing, the water will be all collapsed ! ! into a single layer, so if you are running for places where it doesn't snow ! ! a lot, leave this set to 1. One value for all grids. ! !---------------------------------------------------------------------------------------! NL%NZG = 16 NL%NZS = 4 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! ISOILFLG -- this controls which soil type input you want to use. ! ! 1. Read in from a dataset I will provide in the SOIL_DATABASE variable a ! ! few lines below. ! ! below. ! ! 2. No data available, I will use constant values I will provide in ! ! NSLCON or by prescribing the fraction of sand and clay (see SLXSAND ! ! and SLXCLAY). ! !---------------------------------------------------------------------------------------! NL%ISOILFLG = 2 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! NSLCON -- ED-2 Soil classes that the model will use when ISOILFLG is set to 2. ! ! Possible values are: ! !---------------------------------------------------------------------------------------! ! 1 -- sand | 7 -- silty clay loam | 13 -- bedrock ! ! 2 -- loamy sand | 8 -- clayey loam | 14 -- silt ! ! 3 -- sandy loam | 9 -- sandy clay | 15 -- heavy clay ! ! 4 -- silt loam | 10 -- silty clay | 16 -- clayey sand ! ! 5 -- loam | 11 -- clay | 17 -- clayey silt ! ! 6 -- sandy clay loam | 12 -- peat ! !---------------------------------------------------------------------------------------! NL%NSLCON = 11 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! ISOILCOL -- LEAF-3 and ED-2 soil colour classes that the model will use when ISOILFLG ! ! is set to 2. Soil classes are from 1 to 20 (1 = lightest; 20 = darkest). ! ! The values are the same as CLM-4.0. The table is the albedo for visible ! ! and near infra-red. ! !---------------------------------------------------------------------------------------! ! ! ! |-----------------------------------------------------------------------| ! ! | | Dry soil | Saturated | | Dry soil | Saturated | ! ! | Class |-------------+-------------| Class +-------------+-------------| ! ! | | VIS | NIR | VIS | NIR | | VIS | NIR | VIS | NIR | ! ! |-------+------+------+------+------+-------+------+------+------+------| ! ! | 1 | 0.36 | 0.61 | 0.25 | 0.50 | 11 | 0.24 | 0.37 | 0.13 | 0.26 | ! ! | 2 | 0.34 | 0.57 | 0.23 | 0.46 | 12 | 0.23 | 0.35 | 0.12 | 0.24 | ! ! | 3 | 0.32 | 0.53 | 0.21 | 0.42 | 13 | 0.22 | 0.33 | 0.11 | 0.22 | ! ! | 4 | 0.31 | 0.51 | 0.20 | 0.40 | 14 | 0.20 | 0.31 | 0.10 | 0.20 | ! ! | 5 | 0.30 | 0.49 | 0.19 | 0.38 | 15 | 0.18 | 0.29 | 0.09 | 0.18 | ! ! | 6 | 0.29 | 0.48 | 0.18 | 0.36 | 16 | 0.16 | 0.27 | 0.08 | 0.16 | ! ! | 7 | 0.28 | 0.45 | 0.17 | 0.34 | 17 | 0.14 | 0.25 | 0.07 | 0.14 | ! ! | 8 | 0.27 | 0.43 | 0.16 | 0.32 | 18 | 0.12 | 0.23 | 0.06 | 0.12 | ! ! | 9 | 0.26 | 0.41 | 0.15 | 0.30 | 19 | 0.10 | 0.21 | 0.05 | 0.10 | ! ! | 10 | 0.25 | 0.39 | 0.14 | 0.28 | 20 | 0.08 | 0.16 | 0.04 | 0.08 | ! ! |-----------------------------------------------------------------------| ! ! ! ! Soil type 21 is a special case in which we use the albedo method that used to be ! ! the default in ED-2.1. ! !---------------------------------------------------------------------------------------! NL%ISOILCOL = 21 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! These variables are used to define the soil properties when you don't want to use ! ! the standard soil classes. ! ! ! ! SLXCLAY -- Prescribed fraction of clay [0-1] ! ! SLXSAND -- Prescribed fraction of sand [0-1]. ! ! ! ! They are used only when ISOILFLG is 2, both values are between 0. and 1., and ! ! theira sum doesn't exceed 1. Otherwise standard ED values will be used instead. ! !---------------------------------------------------------------------------------------! NL%SLXCLAY = 0.59 NL%SLXSAND = 0.39 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! Soil grid and initial conditions if no file is provided: ! ! ! ! SLZ - soil depth in m. Values must be negative and go from the deepest layer to ! ! the top. ! ! SLMSTR - this is the initial soil moisture, now given as the soil moisture index. ! ! Values can be fraction, in which case they will be linearly interpolated ! ! between the special points (e.g. 0.5 will put soil moisture half way ! ! between the wilting point and field capacity). ! ! -1 = dry air soil moisture ! ! 0 = wilting point ! ! 1 = field capacity ! ! 2 = porosity (saturation) ! ! STGOFF - initial temperature offset (soil temperature = air temperature + offset) ! !---------------------------------------------------------------------------------------! NL%SLZ = -8.000, -6.959, -5.995, -5.108, -4.296, -3.560, -2.897, -2.307, -1.789, -1.340, -0.961, -0.648, -0.400, -0.215, -0.089, -0.020 NL%SLMSTR = 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00 NL%STGOFF = 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! Input databases ! ! VEG_DATABASE -- vegetation database, used only to determine the land/water mask. ! ! Fill with the path and the prefix. ! ! SOIL_DATABASE -- soil database, used to determine the soil type. Fill with the ! ! path and the prefix. ! ! LU_DATABASE -- land-use change disturbance rates database, used only when ! ! IANTH_DISTURB is set to 1. Fill with the path and the prefix. ! ! PLANTATION_FILE -- plantation fraction file. In case you don't have such a file or ! ! you do not want to use it, you must leave this variable empty: ! ! (NL%PLANTATION_FILE = '' ! ! THSUMS_DATABASE -- input directory with dataset to initialise chilling degrees and ! ! growing degree days, which is used to drive the cold-deciduous ! ! phenology (you must always provide this, even when your PFTs are ! ! not cold deciduous). ! ! ED_MET_DRIVER_DB -- File containing information for meteorological driver ! ! instructions (the "header" file). ! ! SOILSTATE_DB -- Dataset in case you want to provide the initial conditions of ! ! soil temperature and moisture. ! ! SOILDEPTH_DB -- Dataset in case you want to read in soil depth information. ! !---------------------------------------------------------------------------------------! NL%VEG_DATABASE = '/data/oge2OLD/OGE2_' NL%SOIL_DATABASE = '/data/faoOLD/FAO_' NL%LU_DATABASE = '/data/ed_inputs/glu/' NL%PLANTATION_FILE = '' NL%THSUMS_DATABASE = '/data/ed_inputs/' NL%ED_MET_DRIVER_DB = '/data/sites/Santarem_Km83/ED_MET_DRIVER_HEADER' NL%SOILSTATE_DB = '' NL%SOILDEPTH_DB = '' !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! ISOILSTATEINIT -- Variable controlling how to initialise the soil temperature and ! ! moisture ! ! 0. Use SLMSTR and STGOFF. ! ! 1. Read from SOILSTATE_DB. ! ! ISOILDEPTHFLG -- Variable controlling how to initialise soil depth ! ! 0. Constant, always defined by the first SLZ layer. ! ! 1. Read from SOILDEPTH_DB. ! !---------------------------------------------------------------------------------------! NL%ISOILSTATEINIT = 0 NL%ISOILDEPTHFLG = 0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! ISOILBC -- This controls the soil moisture boundary condition at the bottom. If ! ! unsure, use 0 for short-term simulations (couple of days), and 1 for long- ! ! -term simulations (months to years). ! ! 0. Bedrock. Flux from the bottom of the bottommost layer is set to 0. ! ! 1. Gravitational flow. The flux from the bottom of the bottommost layer ! ! is due to gradient of height only. ! ! 2. Super drainage. Soil moisture of the ficticious layer beneath the ! ! bottom is always at dry air soil moisture. ! ! 3. Half-way. Assume that the fictious layer beneath the bottom is always ! ! at field capacity. ! ! 4. Aquifer. Soil moisture of the ficticious layer beneath the bottom is ! ! always at saturation. ! !---------------------------------------------------------------------------------------! NL%ISOILBC = 1 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! IVEGT_DYNAMICS -- The vegetation dynamics scheme. ! ! 0. No vegetation dynamics, the initial state will be preserved, ! ! even though the model will compute the potential values. This ! ! option is useful for theoretical simulations only. ! ! 1. Normal ED vegetation dynamics (Moorcroft et al 2001). ! ! The normal option for almost any simulation. ! !---------------------------------------------------------------------------------------! NL%IVEGT_DYNAMICS = 1 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! IBIGLEAF -- Do you want to run ED as a 'big leaf' model? ! ! 0. No, use the standard size- and age-structure (Moorcroft et al. 2001) ! ! This is the recommended method for most applications. ! ! 1. 'big leaf' ED: this will have no horizontal or vertical hetero- ! ! geneities; 1 patch per PFT and 1 cohort per patch; no vertical ! ! growth, recruits will 'appear' instantaneously at maximum height. ! ! ! ! N.B. if you set IBIGLEAF to 1, you MUST turn off the crown model (CROWN_MOD = 0) ! !---------------------------------------------------------------------------------------! NL%IBIGLEAF = 0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! INTEGRATION_SCHEME -- The biophysics integration scheme. ! ! 0. Euler step. The fastest, but it doesn't estimate ! ! errors. ! ! 1. Fourth-order Runge-Kutta method. ED-2.1 default method ! ! 2. Heun's method (a second-order Runge-Kutta). ! ! 3. Hybrid Stepping (BDF2 implicit step for the canopy air and ! ! leaf temp, forward Euler for else, under development). ! !---------------------------------------------------------------------------------------! NL%INTEGRATION_SCHEME = 1 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! RK4_TOLERANCE -- This is the relative tolerance for Runge-Kutta or Heun's ! ! integration. Larger numbers will make runs go faster, at the ! ! expense of being less accurate. Currently the valid range is ! ! between 1.e-7 and 1.e-1, but recommended values are between 1.e-4 ! ! and 1.e-2. ! !---------------------------------------------------------------------------------------! NL%RK4_TOLERANCE = 0.01 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! IBRANCH_THERMO -- This determines whether branches should be included in the ! ! vegetation thermodynamics and radiation or not. ! ! 0. No branches in energy/radiation (ED-2.1 default); ! ! 1. Branches are accounted in the energy and radiation. Branchwood ! ! and leaf are treated separately in the canopy radiation scheme, ! ! but solved as a single pool in the biophysics integration. ! ! 2. Similar to 1, but branches are treated as separate pools in the ! ! biophysics (thus doubling the number of prognostic variables). ! !---------------------------------------------------------------------------------------! NL%IBRANCH_THERMO = 1 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! IPHYSIOL -- This variable will determine the functional form that will control how ! ! the various parameters will vary with temperature, and how the CO2 ! ! compensation point for gross photosynthesis (Gamma*) will be found. ! ! Options are: ! ! ! ! 0 -- Original ED-2.1, we use the "Arrhenius" function as in Foley et al. (1996) and ! ! Moorcroft et al. (2001). Gamma* is found using the parameters for tau as in ! ! Foley et al. (1996). ! ! 1 -- Modified ED-2.1. In this case Gamma* is found using the Michaelis-Mentel ! ! coefficients for CO2 and O2, as in Farquhar et al. (1980) and in CLM. ! ! 2 -- Collatz et al. (1991). We use the power (Q10) equations, with Collatz et al. ! ! parameters for compensation point, and the Michaelis-Mentel coefficients. The ! ! correction for high and low temperatures are the same as in Moorcroft et al. ! ! (2001). ! ! 3 -- Same as 2, except that we find Gamma* as in Farquhar et al. (1980) and in CLM. ! !---------------------------------------------------------------------------------------! NL%IPHYSIOL = 2 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! IALLOM -- Which allometry to use (this mostly affects tropical PFTs. Temperate PFTs ! ! will use the new root allometry and the maximum crown area if IALLOM is set ! ! to 1 or 2). ! ! 0. Original ED-2.1 ! ! 1. a. The coefficients for structural biomass are set so the total AGB ! ! is similar to Baker et al. (2004), equation 2. Balive is the ! ! default ED-2.1; ! ! b. Experimental root depth that makes canopy trees to have root depths ! ! of 5m and grasses/seedlings at 0.5 to have root depth of 0.5 m. ! ! c. Crown area defined as in Poorter et al. (2006), imposing maximum ! ! crown area ! ! 2. Similar to 1, but with a few extra changes. ! ! a. Height -> DBH allometry as in Poorter et al. (2006) ! ! b. Balive is retuned, using a few leaf biomass allometric equations for ! ! a few genuses in Costa Rica. References: ! ! Cole and Ewel (2006), and Calvo Alvarado et al. (2008). ! !---------------------------------------------------------------------------------------! NL%IALLOM = 2 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! IGRASS -- This controls the dynamics and growth calculation for grasses. A new ! ! grass scheme is now available where bdead = 0, height is a function of bleaf! ! and growth happens daily. ALS (3/3/12) ! ! 0: grasses behave like trees as in ED2.1 (old scheme) ! ! ! ! 1: new grass scheme as described above ! !---------------------------------------------------------------------------------------! NL%IGRASS = 0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! IPHEN_SCHEME -- It controls the phenology scheme. Even within each scheme, the ! ! actual phenology will be different depending on the PFT. ! ! ! ! -1: grasses - evergreen; ! ! tropical - evergreen; ! ! conifers - evergreen; ! ! hardwoods - cold-deciduous (Botta et al.); ! ! ! ! 0: grasses - drought-deciduous (old scheme); ! ! tropical - drought-deciduous (old scheme); ! ! conifers - evergreen; ! ! hardwoods - cold-deciduous; ! ! ! ! 1: prescribed phenology ! ! ! ! 2: grasses - drought-deciduous (new scheme); ! ! tropical - drought-deciduous (new scheme); ! ! conifers - evergreen; ! ! hardwoods - cold-deciduous; ! ! ! ! 3: grasses - drought-deciduous (new scheme); ! ! tropical - drought-deciduous (light phenology); ! ! conifers - evergreen; ! ! hardwoods - cold-deciduous; ! ! ! ! Old scheme: plants shed their leaves once instantaneous amount of available water ! ! becomes less than a critical value. ! ! New scheme: plants shed their leaves once a 10-day running average of available ! ! water becomes less than a critical value. ! !---------------------------------------------------------------------------------------! NL%IPHEN_SCHEME = 2 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! Parameters that control the phenology response to radiation, used only when ! ! IPHEN_SCHEME = 3. ! ! ! ! RADINT -- Intercept ! ! RADSLP -- Slope. ! !---------------------------------------------------------------------------------------! NL%RADINT = -11.3868 NL%RADSLP = 0.0824 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! REPRO_SCHEME -- This controls plant reproduction and dispersal. ! ! 0. Reproduction off. Useful for very short runs only. ! ! 1. Original reproduction scheme. Seeds are exchanged between ! ! patches belonging to the same site, but they can't go outside ! ! their original site. ! ! 2. Similar to 1, but seeds are exchanged between patches belonging ! ! to the same polygon, even if they are in different sites. They ! ! can't go outside their original polygon, though. This is the ! ! same as option 1 if there is only one site per polygon. ! ! 3. Similar to 2, but recruits will only be formed if their phenology ! ! status would be "leaves fully flushed". This only matters for ! ! drought deciduous plants. This option is for testing purposes ! ! only, think 50 times before using it... ! !---------------------------------------------------------------------------------------! NL%REPRO_SCHEME = 2 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! LAPSE_SCHEME -- This specifies the met lapse rate scheme: ! ! 0. No lapse rates ! ! 1. phenomenological, global ! ! 2. phenomenological, local (not yet implemented) ! ! 3. mechanistic(not yet implemented) ! !---------------------------------------------------------------------------------------! NL%LAPSE_SCHEME = 0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! CROWN_MOD -- Specifies how tree crowns are represent in the canopy radiation model, ! ! and in the turbulence scheme depending on ICANTURB. ! ! 0. ED1 default, crowns are evenly spread throughout the patch area, and ! ! cohorts are stacked on the top of each other. ! ! 1. Dietze (2008) model. Cohorts have a finite radius, and cohorts are ! ! stacked on the top of each other. ! !---------------------------------------------------------------------------------------! NL%CROWN_MOD = 0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! The following variables control the canopy radiation solver. ! ! ! ! ICANRAD -- Specifies how canopy radiation is solved. This variable sets both ! ! shortwave and longwave. ! ! 0. Two-stream model (Medvigy 2006), with the possibility to apply ! ! finite crown area to direct shortwave radiation. ! ! 1. Multiple-scattering model (Zhao and Qualls 2005,2006), with the ! ! possibility to apply finite crown area to all radiation fluxes. ! ! LTRANS_VIS -- Leaf transmittance for tropical plants - Visible/PAR ! ! LTRANS_NIR -- Leaf transmittance for tropical plants - Near Infrared ! ! LREFLECT_VIS -- Leaf reflectance for tropical plants - Visible/PAR ! ! LREFLECT_NIR -- Leaf reflectance for tropical plants - Near Infrared ! ! ORIENT_TREE -- Leaf orientation factor for tropical trees. Extremes are: ! ! -1. All leaves are oriented in the vertical ! ! 0. Leaf orientation is perfectly random ! ! 1. All leaves are oriented in the horizontal ! ! In practice, acceptable values range from -0.4 and 0.6 ! ! ORIENT_GRASS -- Leaf orientation factor for tropical grasses. Extremes are: ! ! -1. All leaves are oriented in the vertical ! ! 0. Leaf orientation is perfectly random ! ! 1. All leaves are oriented in the horizontal ! ! In practice, acceptable values range from -0.4 and 0.6 ! ! CLUMP_TREE -- Clumping factor for tropical trees. Extremes are: ! ! lim -> 0. Black hole (0 itself is unacceptable) ! ! 1. Homogeneously spread over the layer (i.e., no clumping) ! ! CLUMP_GRASS -- Clumping factor for tropical grasses. Extremes are: ! ! lim -> 0. Black hole (0 itself is unacceptable) ! ! 1. Homogeneously spread over the layer (i.e., no clumping) ! !---------------------------------------------------------------------------------------! NL%ICANRAD = 0 NL%LTRANS_VIS = 0.050 NL%LTRANS_NIR = 0.230 NL%LREFLECT_VIS = 0.100 NL%LREFLECT_NIR = 0.460 NL%ORIENT_TREE = 0.100 NL%ORIENT_GRASS = 0.000 NL%CLUMP_TREE = 0.800 NL%CLUMP_GRASS = 1.000 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! DECOMP_SCHEME -- This specifies the dependence of soil decomposition on temperature. ! ! 0. ED-2.0 default, the original exponential ! ! 1. Lloyd and Taylor (1994) model ! ! [[option 1 requires parameters to be set in xml]] ! !---------------------------------------------------------------------------------------! NL%DECOMP_SCHEME = 0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! H2O_PLANT_LIM -- this determines whether plant photosynthesis can be limited by ! ! soil moisture, the FSW, defined as FSW = Supply / (Demand + Supply). ! ! ! ! Demand is always the transpiration rates in case soil moisture is ! ! not limiting (the psi_0 term times LAI). The supply is determined ! ! by Kw * nplant * Broot * Available_Water, and the definition of ! ! available water changes depending on H2O_PLANT_LIM: ! ! 0. Force FSW = 1 (effectively available water is infinity). ! ! 1. Available water is the total soil water above wilting point, ! ! integrated across all layers within the rooting zone. ! ! 2. Available water is the soil water at field capacity minus ! ! wilting point, scaled by the so-called wilting factor: ! ! (psi(k) - (H - z(k)) - psi_wp) / (psi_fc - psi_wp) ! ! where psi is the matric potentital at layer k, z is the layer ! ! depth, H it the crown height and psi_fc and psi_wp are the ! ! matric potentials at wilting point and field capacity. ! !---------------------------------------------------------------------------------------! NL%H2O_PLANT_LIM = 2 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! The following variables are factors that control photosynthesis and respiration. ! ! Notice that some of them are relative values whereas others are absolute. ! ! ! ! VMFACT_C3 -- Factor multiplying the default Vm0 for C3 plants (1.0 = default). ! ! VMFACT_C4 -- Factor multiplying the default Vm0 for C4 plants (1.0 = default). ! ! MPHOTO_TRC3 -- Stomatal slope (M) for tropical C3 plants ! ! MPHOTO_TEC3 -- Stomatal slope (M) for conifers and temperate C3 plants ! ! MPHOTO_C4 -- Stomatal slope (M) for C4 plants. ! ! BPHOTO_BLC3 -- cuticular conductance for broadleaf C3 plants [umol/m2/s] ! ! BPHOTO_NLC3 -- cuticular conductance for needleleaf C3 plants [umol/m2/s] ! ! BPHOTO_C4 -- cuticular conductance for C4 plants [umol/m2/s] ! ! KW_GRASS -- Water conductance for trees, in m2/yr/kgC_root. This is used only ! ! when H2O_PLANT_LIM is not 0. ! ! KW_TREE -- Water conductance for grasses, in m2/yr/kgC_root. This is used only ! ! when H2O_PLANT_LIM is not 0. ! ! GAMMA_C3 -- The dark respiration factor (gamma) for C3 plants. Subtropical ! ! conifers will be scaled by GAMMA_C3 * 0.028 / 0.02 ! ! GAMMA_C4 -- The dark respiration factor (gamma) for C4 plants. ! ! D0_GRASS -- The transpiration control in gsw (D0) for ALL grasses. ! ! D0_TREE -- The transpiration control in gsw (D0) for ALL trees. ! ! ALPHA_C3 -- Quantum yield of ALL C3 plants. This is only applied when ! ! QUANTUM_EFFICIENCY_T = 0. ! ! ALPHA_C4 -- Quantum yield of C4 plants. This is always applied. ! ! KLOWCO2IN -- The coefficient that controls the PEP carboxylase limited rate of ! ! carboxylation for C4 plants. ! ! RRFFACT -- Factor multiplying the root respiration factor for ALL PFTs. ! ! (1.0 = default). ! ! GROWTHRESP -- The actual growth respiration factor (C3/C4 tropical PFTs only). ! ! (1.0 = default). ! ! LWIDTH_GRASS -- Leaf width for grasses, in metres. This controls the leaf boundary ! ! layer conductance (gbh and gbw). ! ! LWIDTH_BLTREE -- Leaf width for trees, in metres. This controls the leaf boundary ! ! layer conductance (gbh and gbw). This is applied to broadleaf trees ! ! only. ! ! LWIDTH_NLTREE -- Leaf width for trees, in metres. This controls the leaf boundary ! ! layer conductance (gbh and gbw). This is applied to conifer trees ! ! only. ! ! Q10_C3 -- Q10 factor for C3 plants (used only if IPHYSIOL is set to 2 or 3). ! ! Q10_C4 -- Q10 factor for C4 plants (used only if IPHYSIOL is set to 2 or 3). ! !---------------------------------------------------------------------------------------! NL%VMFACT_C3 = 1 NL%VMFACT_C4 = 1 NL%MPHOTO_TRC3 = 9 NL%MPHOTO_TEC3 = 7.2 NL%MPHOTO_C4 = 5.2 NL%BPHOTO_BLC3 = 10000 NL%BPHOTO_NLC3 = 1000 NL%BPHOTO_C4 = 10000 NL%KW_GRASS = 900 NL%KW_TREE = 600 NL%GAMMA_C3 = 0.0145 NL%GAMMA_C4 = 0.035 NL%D0_GRASS = 0.016 NL%D0_TREE = 0.016 NL%ALPHA_C3 = 0.08 NL%ALPHA_C4 = 0.055 NL%KLOWCO2IN = 4000 NL%RRFFACT = 1 NL%GROWTHRESP = 0.333 NL%LWIDTH_GRASS = 0.05 NL%LWIDTH_BLTREE = 0.1 NL%LWIDTH_NLTREE = 0.05 NL%Q10_C3 = 2.4 NL%Q10_C4 = 2.4 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! THETACRIT -- Leaf drought phenology threshold. The sign matters here: ! ! >= 0. -- This is the relative soil moisture above the wilting point ! ! below which the drought-deciduous plants will start shedding ! ! their leaves ! ! < 0. -- This is the soil potential in MPa below which the drought- ! ! -deciduous plants will start shedding their leaves. The wilt- ! ! ing point is by definition -1.5MPa, so make sure that the value ! ! is above -1.5. ! !---------------------------------------------------------------------------------------! NL%THETACRIT = -1.20 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! QUANTUM_EFFICIENCY_T -- Which quantum yield model should to use for C3 plants ! ! 0. Original ED-2.1, quantum efficiency is constant. ! ! 1. Quantum efficiency varies with temperature following ! ! Ehleringer (1978) polynomial fit. ! !---------------------------------------------------------------------------------------! NL%QUANTUM_EFFICIENCY_T = 0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! N_PLANT_LIM -- This controls whether plant photosynthesis can be limited by nitrogen. ! ! 0. No limitation ! ! 1. ED-2.1 nitrogen limitation model. ! !---------------------------------------------------------------------------------------! NL%N_PLANT_LIM = 0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! N_DECOMP_LIM -- This controls whether decomposition can be limited by nitrogen. ! ! 0. No limitation ! ! 1. ED-2.1 nitrogen limitation model. ! !---------------------------------------------------------------------------------------! NL%N_DECOMP_LIM = 0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! The following parameters adjust the fire disturbance in the model. ! ! INCLUDE_FIRE -- Which threshold to use for fires. ! ! 0. No fires; ! ! 1. (deprecated) Fire will be triggered with enough biomass and ! ! integrated ground water depth less than a threshold. Based on ! ! ED-1, the threshold assumes that the soil is 1 m, so deeper ! ! soils will need to be much drier to allow fires to happen and ! ! often will never allow fires. ! ! 2. Fire will be triggered with enough biomass and the total soil ! ! water at the top 75 cm falls below a threshold. ! ! FIRE_PARAMETER -- If fire happens, this will control the intensity of the disturbance ! ! given the amount of fuel (currently the total above-ground ! ! biomass). ! ! SM_FIRE -- This is used only when INCLUDE_FIRE = 2. The sign here matters. ! ! >= 0. - Minimum relative soil moisture above dry air of the top 1m ! ! that will prevent fires to happen. ! ! < 0. - Minimum mean soil moisture potential in MPa of the top 1m ! ! that will prevent fires to happen. The dry air soil ! ! potential is defined as -3.1 MPa, so make sure SM_FIRE is ! ! greater than this value. ! !---------------------------------------------------------------------------------------! NL%INCLUDE_FIRE = 0 NL%FIRE_PARAMETER = 0.5 NL%SM_FIRE = -1.40 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! IANTH_DISTURB -- This flag controls whether to include anthropogenic disturbances ! ! such as land clearing, abandonment, and logging. ! ! 0. no anthropogenic disturbance. ! ! 1. use anthropogenic disturbance dataset. ! !---------------------------------------------------------------------------------------! NL%IANTH_DISTURB = 0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! ICANTURB -- This flag controls the canopy roughness. ! ! 0. Based on Leuning et al. (1995), wind is computed using the similarity ! ! theory for the top cohort, and they are extinguished with cumulative ! ! LAI. If using CROWN_MOD 1 or 2, this will use local LAI and average ! ! by crown area. ! ! 1. The default ED-2.1 scheme, except that it uses the zero-plane ! ! displacement height. ! ! 2. This uses the method of Massman (1997) using constant drag and no ! ! sheltering factor. ! ! 3. This is also based on Massman (1997), but with the option of varying ! ! the drag and sheltering within the canopy. ! ! 4. Same as 0, but if finds the ground conductance following CLM ! ! technical note (equations 5.98-5.100). ! !---------------------------------------------------------------------------------------! NL%ICANTURB = 2 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! ISFCLYRM -- Similarity theory model. The model that computes u*, T*, etc... ! ! 1. BRAMS default, based on Louis (1979). It uses empirical relations to ! ! estimate the flux based on the bulk Richardson number ! ! ! ! All models below use an interative method to find z/L, and the only change ! ! is the functional form of the psi functions. ! ! ! ! 2. Oncley and Dudhia (1995) model, based on MM5. ! ! 3. Beljaars and Holtslag (1991) model. Similar to 2, but it uses an alternative ! ! method for the stable case that mixes more than the OD95. ! ! 4. CLM (2004). Similar to 2 and 3, but they have special functions to deal with ! ! very stable and very stable cases. ! !---------------------------------------------------------------------------------------! NL%ISFCLYRM = 3 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! IED_GRNDVAP -- Methods to find the ground -> canopy conductance. ! ! 0. Modified Lee Pielke (1992), adding field capacity, but using beta factor ! ! without the square, like in Noilhan and Planton (1989). This is the closest ! ! to the original ED-2.0 and LEAF-3, and it is also the recommended one. ! ! 1. Test # 1 of Mahfouf and Noilhan (1991) ! ! 2. Test # 2 of Mahfouf and Noilhan (1991) ! ! 3. Test # 3 of Mahfouf and Noilhan (1991) ! ! 4. Test # 4 of Mahfouf and Noilhan (1991) ! ! 5. Combination of test #1 (alpha) and test #2 (soil resistance). ! ! In all cases the beta term is modified so it approaches zero as soil moisture goes ! ! to dry air soil. ! !---------------------------------------------------------------------------------------! NL%IED_GRNDVAP = 0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! The following variables are used to control the similarity theory model. For the ! ! meaning of these parameters, check Beljaars and Holtslag (1991). ! ! GAMM -- gamma coefficient for momentum, unstable case (dimensionless) ! ! Ignored when ISTAR = 1 ! ! GAMH -- gamma coefficient for heat, unstable case (dimensionless) ! ! Ignored when ISTAR = 1 ! ! TPRANDTL -- Turbulent Prandtl number ! ! Ignored when ISTAR = 1 ! ! RIBMAX -- maximum bulk Richardson number. ! ! LEAF_MAXWHC -- Maximum water that can be intercepted by leaves, in kg/m2leaf. ! !---------------------------------------------------------------------------------------! NL%GAMM = 13.0 NL%GAMH = 13.0 NL%TPRANDTL = 0.74 NL%RIBMAX = 0.50 NL%LEAF_MAXWHC = 0.11 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! IPERCOL -- This controls percolation and infiltration. ! ! 0. Default method. Assumes soil conductivity constant and for the ! ! temporary surface water, it sheds liquid in excess of a 1:9 liquid- ! ! -to-ice ratio through percolation. Temporary surface water exists ! ! only if the top soil layer is at saturation. ! ! 1. Constant soil conductivity, and it uses the percolation model as in ! ! Anderson (1976) NOAA technical report NWS 19. Temporary surface ! ! water may exist after a heavy rain event, even if the soil doesn't ! ! saturate. Recommended value. ! ! 2. Soil conductivity decreases with depth even for constant soil moisture ! ! , otherwise it is the same as 1. ! !---------------------------------------------------------------------------------------! NL%IPERCOL = 1 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! The following variables control the plant functional types (PFTs) that will be ! ! used in this simulation. ! ! ! ! INCLUDE_THESE_PFT -- a list containing all the PFTs you want to include in this run ! ! AGRI_STOCK -- which PFT should be used for agriculture ! ! (used only when IANTH_DISTURB = 1) ! ! PLANTATION_STOCK -- which PFT should be used for plantation ! ! (used only when IANTH_DISTURB = 1) ! ! ! ! PFT table ! !---------------------------------------------------------------------------------------! ! 1 - C4 grass | 9 - early temperate deciduous ! ! 2 - early tropical | 10 - mid temperate deciduous ! ! 3 - mid tropical | 11 - late temperate deciduous ! ! 4 - late tropical | 12:15 - agricultural PFTs ! ! 5 - temperate C3 grass | 16 - Subtropical C3 grass ! ! 6 - northern pines | (C4 grass with C3 photo). ! ! 7 - southern pines | 17 - "Araucaria" (non-optimised ! ! 8 - late conifers | Southern Pines). ! !---------------------------------------------------------------------------------------! NL%INCLUDE_THESE_PFT = 1,2,3,4,16 NL%AGRI_STOCK = 1 NL%PLANTATION_STOCK = 3 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! PFT_1ST_CHECK -- What to do if the initialisation file has a PFT that is not listed ! ! in INCLUDE_THESE_PFT (ignored if IED_INIT_MODE is -1 or 0) ! ! 0. Stop the run ! ! 1. Add the PFT in the INCLUDE_THESE_PFT list ! ! 2. Ignore the cohort ! !---------------------------------------------------------------------------------------! NL%PFT_1ST_CHECK = 0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! The following variables control the size of sub-polygon structures in ED-2. ! ! MAXSITE -- This is the strict maximum number of sites that each polygon can ! ! contain. Currently this is used only when the user wants to run ! ! the same polygon with multiple soil types. If there aren't that ! ! many different soil types with a minimum area (check MIN_SITE_AREA ! ! below), then the model will allocate just the amount needed. ! ! MAXPATCH -- If number of patches in a given site exceeds MAXPATCH, force patch ! ! fusion. If MAXPATCH is 0, then fusion will never happen. If ! ! MAXPATCH is negative, then the absolute value is used only during ! ! the initialization, and fusion will never happen again. Notice ! ! that if the patches are too different, then the actual number of ! ! patches in a site may exceed MAXPATCH. ! ! MAXCOHORT -- If number of cohorts in a given patch exceeds MAXCOHORT, force ! ! cohort fusion. If MAXCOHORT is 0, then fusion will never happen. ! ! If MAXCOHORT is negative, then the absolute value is used only ! ! during the initialization, and fusion will never happen again. ! ! Notice that if the cohorts are too different, then the actual ! ! number of cohorts in a patch may exceed MAXCOHORT. ! ! MIN_SITE_AREA -- This is the minimum fraction area of a given soil type that allows ! ! a site to be created (ignored if IED_INIT_MODE is set to 3). ! ! MIN_PATCH_AREA -- This is the minimum fraction area of a given soil type that allows ! ! a site to be created (ignored if IED_INIT_MODE is set to 3). ! !---------------------------------------------------------------------------------------! NL%MAXSITE = 1 NL%MAXPATCH = 10 NL%MAXCOHORT = 40 NL%MIN_SITE_AREA = 0.005 NL%MIN_PATCH_AREA = 0.005 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! ZROUGH -- constant roughness, in metres, if for all domain ! !---------------------------------------------------------------------------------------! NL%ZROUGH = 0.1 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! Treefall disturbance parameters. ! ! TREEFALL_DISTURBANCE_RATE -- Sign-dependent treefall disturbance rate: ! ! > 0. usual disturbance rate, in 1/years; ! ! = 0. No treefall disturbance; ! ! < 0. Treefall will be added as a mortality rate (it ! ! will kill plants, but it won't create a new patch). ! ! TIME2CANOPY -- Minimum patch age for treefall disturbance to happen. ! ! If TREEFALL_DISTURBANCE_RATE = 0., this value will be ! ! ignored. If this value is different than zero, then ! ! TREEFALL_DISTURBANCE_RATE is internally adjusted so the ! ! average patch age is still 1/TREEFALL_DISTURBANCE_RATE ! !---------------------------------------------------------------------------------------! NL%TREEFALL_DISTURBANCE_RATE = 0.014 NL%TIME2CANOPY = 0.0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! RUNOFF_TIME -- In case a temporary surface water (TSW) is created, this is the "e- ! ! -folding lifetime" of the TSW in seconds due to runoff. If you don't ! ! want runoff to happen, set this to 0. ! !---------------------------------------------------------------------------------------! NL%RUNOFF_TIME = 3600.0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! The following variables control the minimum values of various velocities in the ! ! canopy. This is needed to avoid the air to be extremely still, or to avoid singular- ! ! ities. When defining the values, keep in mind that UBMIN >= UGBMIN >= USTMIN. ! ! ! ! UBMIN -- minimum wind speed at the top of the canopy air space [ m/s] ! ! UGBMIN -- minimum wind speed at the leaf level [ m/s] ! ! USTMIN -- minimum friction velocity, u*, in m/s. [ m/s] ! !---------------------------------------------------------------------------------------! NL%UBMIN = 0.65 NL%UGBMIN = 0.25 NL%USTMIN = 0.05 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! Control parameters for printing to standard output. Any variable can be printed ! ! to standard output as long as it is one dimensional. Polygon variables have been ! ! tested, no gaurtantees for other hierarchical levels. Choose any variables that are ! ! defined in the variable table fill routine in ed_state_vars.f90. Choose the start ! ! and end index of the polygon,site,patch or cohort. It should work in parallel. The ! ! indices are global indices of the entire domain. The are printed out in rows of 10 ! ! columns each. ! ! ! ! IPRINTPOLYS -- 0. Do not print information to screen ! ! 1. Print polygon arrays to screen, use variables described below to ! ! determine which ones and how ! ! NPVARS -- Number of variables to be printed ! ! PRINTVARS -- List of variables to be printed ! ! PFMTSTR -- The standard fortran format for the prints. One format per variable ! ! IPMIN -- First polygon (absolute index) to be print ! ! IPMAX -- Last polygon (absolute index) to print ! !---------------------------------------------------------------------------------------! NL%IPRINTPOLYS = 0 NL%NPVARS = 1 NL%PRINTVARS = 'AVG_PCPG','AVG_CAN_TEMP','AVG_VAPOR_AC','AVG_CAN_SHV' NL%PFMTSTR = 'f10.8','f5.1','f7.2','f9.5' NL%IPMIN = 1 NL%IPMAX = 60 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! Variables that control the meteorological forcing. ! ! ! ! IMETTYPE -- Format of the meteorological dataset ! ! 0. ASCII (deprecated) ! ! 1. HDF5 ! ! ISHUFFLE -- How to choose an year outside the meterorological data range (see ! ! METCYC1 and METCYCF). ! ! 0. Sequentially cycle over years ! ! 1. Randomly pick the years, using the same sequence. This has worked ! ! with gfortran running in Mac OS X system, but it acts like option 2 ! ! when running ifort. ! ! 2. Randomly pick the years, choosing a different sequence each time ! ! the model is run. ! ! IMETCYC1 -- First year with meteorological information ! ! IMETCYCF -- Last year with meteorological information ! ! IMETAVG -- How the input radiation was originally averaged. You must tell this ! ! because ED-2.1 can make a interpolation accounting for the cosine of ! ! zenith angle. ! ! -1. I don't know, use linear interpolation. ! ! 0. No average, the values are instantaneous ! ! 1. Averages ending at the reference time ! ! 2. Averages beginning at the reference time ! ! 3. Averages centred at the reference time ! ! IMETRAD -- What should the model do with the input short wave radiation? ! ! 0. Nothing, use it as is. ! ! 1. Add them together, then use the SiB method to break radiation down ! ! into the four components (PAR direct, PAR diffuse, NIR direct, ! ! NIR diffuse). ! ! 2. Add then together, then use the method by Weiss and Norman (1985) ! ! to break radiation down to the four components. ! ! 3. Gloomy -- All radiation goes to diffuse. ! ! 4. Sesame street -- all radiation goes to direct, except at night. ! ! INITIAL_CO2 -- Initial value for CO2 in case no CO2 is provided at the meteorological ! ! driver dataset [Units: µmol/mol] ! !---------------------------------------------------------------------------------------! NL%IMETTYPE = 1 NL%ISHUFFLE = 0 NL%METCYC1 = 2000 NL%METCYCF = 2003 NL%IMETAVG = 1 NL%IMETRAD = 2 NL%INITIAL_CO2 = 378.0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! The following variables control the phenology prescribed from observations: ! ! ! ! IPHENYS1 -- First year for spring phenology ! ! IPHENYSF -- Final year for spring phenology ! ! IPHENYF1 -- First year for fall/autumn phenology ! ! IPHENYFF -- Final year for fall/autumn phenology ! ! PHENPATH -- path and prefix of the prescribed phenology data. ! ! ! ! If the years don't cover the entire simulation period, they will be recycled. ! !---------------------------------------------------------------------------------------! NL%IPHENYS1 = 1992 NL%IPHENYSF = 2003 NL%IPHENYF1 = 1992 NL%IPHENYFF = 2003 NL%PHENPATH = '/n/moorcroft_data/data/ed2_data/phenology/phenology' !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! These are some additional configuration files. ! ! IEDCNFGF -- XML file containing additional parameter settings. If you don't have ! ! one, leave it empty ! ! EVENT_FILE -- file containing specific events that must be incorporated into the ! ! simulation. ! ! PHENPATH -- path and prefix of the prescribed phenology data. ! !---------------------------------------------------------------------------------------! NL%IEDCNFGF = 'config.xml' NL%EVENT_FILE = 'myevents.xml' !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! The following variables are used to control the detailed output for debugging ! ! purposes. ! ! ! ! IDETAILED -- This flag controls the possible detailed outputs, mostly used for ! ! debugging purposes. Notice that this doesn't replace the normal debug- ! ! ger options, the idea is to provide detailed output to check bad ! ! assumptions. The options are additive, and the indices below represent ! ! the different types of output: ! ! ! ! 1 -- Detailed budget (every DTLSM) ! ! 2 -- Detailed photosynthesis (every DTLSM) ! ! 4 -- Detailed output from the integrator (every HDID) ! ! 8 -- Thermodynamic bounds for sanity check (every DTLSM) ! ! 16 -- Daily error stats (which variable caused the time step to shrink) ! ! 32 -- Allometry parameters, and minimum and maximum sizes ! ! (two files, only at the beginning) ! ! ! ! In case you don't want any detailed output (likely for most runs), set ! ! IDETAILED to zero. In case you want to generate multiple outputs, add ! ! the number of the sought options: for example, if you want detailed ! ! photosynthesis and detailed output from the integrator, set IDETAILED ! ! to 6 (2 + 4). Any combination of the above outputs is acceptable, al- ! ! though all but the last produce a sheer amount of txt files, in which ! ! case you may want to look at variable PATCH_KEEP. It is also a good ! ! idea to set IVEGT_DYNAMICS to 0 when using the first five outputs. ! ! ! ! ! ! PATCH_KEEP -- This option will eliminate all patches except one from the initial- ! ! isation. This is only used when one of the first five types of ! ! detailed output is active, otherwise it will be ignored. Options are: ! ! -2. Keep only the patch with the lowest potential LAI ! ! -1. Keep only the patch with the highest potential LAI ! ! 0. Keep all patches. ! ! > 0. Keep the patch with the provided index. In case the index is ! ! not valid, the model will crash. ! !---------------------------------------------------------------------------------------! NL%IDETAILED = 0 NL%PATCH_KEEP = 0 !---------------------------------------------------------------------------------------! !---------------------------------------------------------------------------------------! ! IOPTINPT -- Optimization configuration. (Currently not used) ! !---------------------------------------------------------------------------------------! !NL%IOPTINPT = '' !---------------------------------------------------------------------------------------! NL%IOOUTPUT = 3 NL%IADD_SITE_MEANS = 0 NL%IADD_PATCH_MEANS = 0 NL%IADD_COHORT_MEANS = 0 NL%GROWTH_RESP_SCHEME = 0 NL%STORAGE_RESP_SCHEME = 0 NL%PLANT_HYDRO_SCHEME = 0 NL%ISTOMATA_SCHEME = 0 NL%ISTRUCT_GROWTH_SCHEME = 0 NL%TRAIT_PLASTICITY_SCHEME = 0 NL%IDDMORT_SCHEME = 0 NL%CBR_SCHEME = 0 NL%DDMORT_CONST = 0 NL%ICANRAD = 1 NL%DT_CENSUS = 60 NL%YR1ST_CENSUS = 2000 NL%MON1ST_CENSUS = 6 NL%MIN_RECRUIT_DBH = 50 $END !==========================================================================================! !==========================================================================================! Convert the docker image to singularity singularity pull docker://pecan/model-ed2-git Finally you can run the singularity image singularity exec -B ed_inputs:/data/ed_inputs -B faoOLD:/data/faoOLD -B oge2OLD:/data/oge2OLD -B sites:/data/sites -B testrun.s83:/data/testrun.s83 --pwd /data/testrun.s83 ./model-ed2-git.simg ed2.git -s Note that the -B option will mount the folder into the singularity image as the second argument (afther the :) The ed2.git command is started with the -s which will run it in single mode, and not initialize and use MPI. Once the model is finished the outputs should be available under testrun.s83. The example ED2IN file is not 100% correct and will result in the following error: output of (failed) run (click to expand) +---------------- MPI parallel info: --------------------+ + - Machnum = 0 + - Machsize = 1 +---------------- OMP parallel info: --------------------+ + - thread use: 1 + - threads max: 1 + - cpu use: 1 + - cpus max: 1 + Note: Max vals are for node, not sockets. +--------------------------------------------------------+ Reading namelist information Copying namelist !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!! WARNING! WARNING! WARNING! WARNING! WARNING! !!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -> Outfast cannot be less than frqfast. Oufast was redefined to 3600. seconds. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!! WARNING! WARNING! WARNING! WARNING! WARNING! !!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -> Outstate cannot be different than frqstate when unitstate is set to 3 (years). Oustate was set to 1. years. Oustate was redefined to 1. years. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +------------------------------------------------------------+ | Ecosystem Demography Model, version 2.2 +------------------------------------------------------------+ | Input namelist filename is ED2IN | | Single process execution on INITIAL run. +------------------------------------------------------------+ => Generating the land/sea mask. /data/oge2OLD/OGE2_HEADER -> Getting file: /data/oge2OLD/OGE2_30S060W.h5... + Work allocation, node 1; + Polygon array allocation, node 1; + Memory successfully allocated on none 1; [+] Load_Ed_Ecosystem_Params... ---------------------------------------- Treefall disturbance parameters: - LAMBDA_REF = 1.40000E-02 - LAMBDA_EFF = 1.40000E-02 - TIME2CANOPY = 0.00000E+00 ---------------------------------------- [+] Checking for XML config... ********************************************* ** WARNING! ** ** ** ** XML file wasn't found. Using default ** ** parameters in ED. ** ** (You provided config.xml). ** ** ********************************************* [+] Alloc_Soilgrid... [+] Set_Polygon_Coordinates... [+] Sfcdata_ED... [+] Load_Ecosystem_State... + Doing sequential initialization over nodes. + Initializing from ED restart file. Node: 001 [-] filelist_f: Checking prefix: /data/sites/Santarem_Km83/s83_default. + Showing first 10 files: [-] File #: 1 /data/sites/Santarem_Km83/s83_default.lat-3.018lon-54.971.css [-] File #: 2 /data/sites/Santarem_Km83/s83_default.lat-3.018lon-54.971.pss Using patch file: /data/sites/Santarem_Km83/s83_default.lat-3.018lon-54.971.pss Using cohort file: /data/sites/Santarem_Km83/s83_default.lat-3.018lon-54.971.css + Initializing phenology. Node: 001 - Reading thermal sums. + Initializing anthropogenic disturbance forcing. Node: 001 -------------------------------------------------------- Soil information: Polygon name : ts83 Longitude : -54.971 Latitude : -3.018 Prescribed sand and clay : T # of sites : 1 Site : 1 - Type : 16 - Clay fraction = 5.90000E-01 - Sand fraction = 3.90000E-01 - Silt fraction = 2.00000E-02 - SLBS = 1.22460E+01 - SLPOTS = -1.52090E-01 - SLCONS = 2.30320E-06 - Dry air soil = 2.29248E-01 - Wilting point = 2.43249E-01 - Field capacity = 3.24517E-01 - Saturation = 4.27790E-01 - Heat capacity = 1.30601E+06 -------------------------------------------------------- [+] Init_Met_Drivers... [+] Read_Met_Drivers_Init... ------------------------------ - METCYC1 = 2000 - METCYCF = 2003 - NYEARS = 2 ------------------------------ IYEAR YEAR_USE 1 2001 2 2002 ------------------------------ [+] Update_met_drivers... [+] Ed_Init_Atm... Total count in node 1 for grid 1 : POLYGONS= 1 SITES= 1 PATCHES= 18 COHORTS= 1753 Grid: 1 Poly: 1 Lon: -54.9710 Lat: -3.0180 Nplants: 0.73 Avg. LAI: 4.45 NPatches: 18 NCohorts: 660 [+] initHydrology... initHydrology | mynum= 1 ngrids= 1 mpolys= 1 msites= 1 Allocated | mynum= 1 ngrids= 1 mpolys= 1 msites= 1 Updated | mynum= 1 ngrids= 1 mpolys= 1 msites= 1 Deallocated | mynum= 1 ngrids= 1 mpolys= 1 msites= 1 [+] Filltab_Alltypes... [+] Finding frqsum... [+] Loading obstime_list File /nowhere not found! Specify OBSTIME_DB properly in ED namelist. :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -------------------------------------------------------------- !!! FATAL ERROR !!! -------------------------------------------------------------- ---> File: ed_init.F90 ---> Subroutine: read_obstime ---> Reason: OBSTIME_DB not found! -------------------------------------------------------------- ED execution halts (see previous error message)... -------------------------------------------------------------- STOP fatal_error "],["sibcasa.html", "35 SiBCASA", " 35 SiBCASA Model Information Home Page Source Code Contact Kevin Schaefer at (kevin.schaefer[at]nsidc.org License Authors Kevin Shaefer PEcAn Integration Tony Gardella, Yasin Elshorbany Introduction SiBCASA was created from the combination of the Carnegie-Ames_Stanford Approach (CASA) and the Simple Biosphere(SiB) model.They have added other features, including dynamic allocation of GPP for the growth and maintance of leaves, roots, and wood andd explicit calculation of autotrophic respiration. Schaefer, Kevin, et al. “The combined Simple Biosphere/Carnegie-Ames-Stanford Approach (SiBCASA) Model.” J. Geophys. Res 113: G03034. PEcAn configuration file additions Model specific input files Software libraries- PGI 16.5 or later https://www.pgroup.com/products/community.htm NETCDF 3.6.0 Input - Driver data: Made to use CRUNCEP data by default. Param File Initial Condition File Model configuration files The namel_sibdrv is the namelist file that controls input and output file locations. Installation notes Update $PATH to the netcdf library version 3.6 (compiled with the same PGI Fortran compiler) in: PEcAn/sim_setup/Makefile & PEcAn/SiB/sibcasa_arctic4/Makefile Change paths in: Line 379: /SiB/sibcasa_ arctic4/read_ti.F90 Line 260, 273, 286: /SiB/sibcasa_ arctic4/sibdrv_read_ncep2.F90 Update $PATH variables in: /SiB/sibcasa_arctic4/sibmerg.in IMPORTANT: update “fmt_type=‘sing_proc’” even if running on parallel computing system, since the file names will otherwise be incompatible with the outputted data..check Compile code: in ~/SiB/sibcasa_arctic4/ Make clean Make VM Currently Not Avialable on the VM "],["404.html", "Page not found", " Page not found The page you requested cannot be found (perhaps it was moved or renamed). You may want to try searching to find the page's new location, or use the table of contents to find the page you are looking for. "]]
diff --git a/develop/selected-publications.html b/develop/selected-publications.html
index 142da238b..812dc2c87 100644
--- a/develop/selected-publications.html
+++ b/develop/selected-publications.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/selecting-plant-functional-types-pfts-and-other-parameter-groupings..html b/develop/selecting-plant-functional-types-pfts-and-other-parameter-groupings..html
index 9fce1b544..4c668d27d 100644
--- a/develop/selecting-plant-functional-types-pfts-and-other-parameter-groupings..html
+++ b/develop/selecting-plant-functional-types-pfts-and-other-parameter-groupings..html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/sensitivity-analysis-variance-decomposition.html b/develop/sensitivity-analysis-variance-decomposition.html
index f37435b52..90af1b667 100644
--- a/develop/sensitivity-analysis-variance-decomposition.html
+++ b/develop/sensitivity-analysis-variance-decomposition.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/set-up.html b/develop/set-up.html
index 244052372..7c2bb4e0a 100644
--- a/develop/set-up.html
+++ b/develop/set-up.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/settings-1.html b/develop/settings-1.html
index 493d3fc73..1422a50c3 100644
--- a/develop/settings-1.html
+++ b/develop/settings-1.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/settings-configured-analyses.html b/develop/settings-configured-analyses.html
index fc107b221..e4d17f6f3 100644
--- a/develop/settings-configured-analyses.html
+++ b/develop/settings-configured-analyses.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/sharing-data.html b/develop/sharing-data.html
index fc7784150..35a3b4da4 100644
--- a/develop/sharing-data.html
+++ b/develop/sharing-data.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/shiny-setup.html b/develop/shiny-setup.html
index 669425a72..26a3b4859 100644
--- a/develop/shiny-setup.html
+++ b/develop/shiny-setup.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/shiny.html b/develop/shiny.html
index 7fa00aa4d..68b72dd73 100644
--- a/develop/shiny.html
+++ b/develop/shiny.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/sibcasa.html b/develop/sibcasa.html
index 45211c1c2..df8f14a49 100644
--- a/develop/sibcasa.html
+++ b/develop/sibcasa.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/special-case-geo.bu.html b/develop/special-case-geo.bu.html
index 6a001b6e8..d1ba64205 100644
--- a/develop/special-case-geo.bu.html
+++ b/develop/special-case-geo.bu.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/ssh-authentication-password-vs.-ssh-key.html b/develop/ssh-authentication-password-vs.-ssh-key.html
index 49720db32..61370f4c6 100644
--- a/develop/ssh-authentication-password-vs.-ssh-key.html
+++ b/develop/ssh-authentication-password-vs.-ssh-key.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/ssh-tunneling.html b/develop/ssh-tunneling.html
index 52fda9a91..170dd1212 100644
--- a/develop/ssh-tunneling.html
+++ b/develop/ssh-tunneling.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/ssh-tunnels-and-pecan.html b/develop/ssh-tunnels-and-pecan.html
index 4788292e8..90e7dcbed 100644
--- a/develop/ssh-tunnels-and-pecan.html
+++ b/develop/ssh-tunnels-and-pecan.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/ssh-vm-bety.html b/develop/ssh-vm-bety.html
index 017ecd32c..ebc943a16 100644
--- a/develop/ssh-vm-bety.html
+++ b/develop/ssh-vm-bety.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/ssh-vm.html b/develop/ssh-vm.html
index 475f33315..e89782d9e 100644
--- a/develop/ssh-vm.html
+++ b/develop/ssh-vm.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/standalone-tools-modules.html b/develop/standalone-tools-modules.html
index a45920413..cd69ce576 100644
--- a/develop/standalone-tools-modules.html
+++ b/develop/standalone-tools-modules.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/start-runs.html b/develop/start-runs.html
index 5cc143fd5..d72670b98 100644
--- a/develop/start-runs.html
+++ b/develop/start-runs.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/summer-courses-workshops.html b/develop/summer-courses-workshops.html
index 4c568a4c2..eeb193b39 100644
--- a/develop/summer-courses-workshops.html
+++ b/develop/summer-courses-workshops.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/tasks.html b/develop/tasks.html
index ece2d1834..323741fc7 100644
--- a/develop/tasks.html
+++ b/develop/tasks.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/testing-the-shiny-server.html b/develop/testing-the-shiny-server.html
index d5c477cd8..5c3f7fe93 100644
--- a/develop/testing-the-shiny-server.html
+++ b/develop/testing-the-shiny-server.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/the-pecan-docker-install-process-in-detail.html b/develop/the-pecan-docker-install-process-in-detail.html
index 4673e4470..6b6f761d1 100644
--- a/develop/the-pecan-docker-install-process-in-detail.html
+++ b/develop/the-pecan-docker-install-process-in-detail.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/thredds-setup.html b/develop/thredds-setup.html
index ab76ee411..6ff19e02d 100644
--- a/develop/thredds-setup.html
+++ b/develop/thredds-setup.html
@@ -23,7 +23,7 @@
-
+
diff --git a/develop/time-standard.html b/develop/time-standard.html
index b7c92e599..2e7b33320 100644
--- a/develop/time-standard.html
+++ b/develop/time-standard.html
@@ -23,7 +23,7 @@
-
+
@@ -806,8 +806,8 @@
diff --git a/develop/todo.html b/develop/todo.html
index f8459d6df..b24cacbd8 100644
--- a/develop/todo.html
+++ b/develop/todo.html
@@ -23,7 +23,7 @@