From be49e1cef1291306c794b2bb48158a2ce57a24bc Mon Sep 17 00:00:00 2001 From: Taher Chegini Date: Sat, 25 Nov 2023 19:03:11 -0500 Subject: [PATCH] Initial commit --- .codecov.yml | 13 + .deepsource.toml | 13 + .git_archival.txt | 4 + .gitattributes | 4 + .github/FUNDING.yml | 1 + .github/ISSUE_TEMPLATE/bugreport.yml | 74 +++++ .github/ISSUE_TEMPLATE/config.yml | 8 + .github/ISSUE_TEMPLATE/newfeature.yml | 35 +++ .github/PULL_REQUEST_TEMPLATE.md | 6 + .github/dependabot.yml | 8 + .github/workflows/codeql-analysis.yml | 35 +++ .github/workflows/pre-commit.yml | 17 + .github/workflows/release.yml | 88 ++++++ .github/workflows/test.yml | 69 ++++ .gitignore | 112 +++++++ .pre-commit-config.yaml | 122 ++++++++ AUTHORS.rst | 13 + CITATION.cff | 38 +++ CODE_OF_CONDUCT.rst | 87 ++++++ CONTRIBUTING.rst | 155 +++++++++ HISTORY.rst | 8 + LICENSE | 6 +- MANIFEST.in | 11 + README.rst | 331 ++++++++++++++++++++ ci/requirements/environment-dev.yml | 167 ++++++++++ ci/requirements/environment.yml | 89 ++++++ conftest.py | 18 ++ noxfile.py | 126 ++++++++ pygridmet/__init__.py | 31 ++ pygridmet/cli.py | 207 ++++++++++++ pygridmet/core.py | 417 +++++++++++++++++++++++++ pygridmet/exceptions.py | 104 ++++++ pygridmet/print_versions.py | 172 ++++++++++ pygridmet/py.typed | 0 pygridmet/pygridmet.py | 434 ++++++++++++++++++++++++++ pyproject.toml | 273 ++++++++++++++++ tests/test_exceptions.py | 169 ++++++++++ tests/test_pygridmet.py | 142 +++++++++ 38 files changed, 3606 insertions(+), 1 deletion(-) create mode 100644 .codecov.yml create mode 100644 .deepsource.toml create mode 100644 .git_archival.txt create mode 100644 .gitattributes create mode 100644 .github/FUNDING.yml create mode 100644 .github/ISSUE_TEMPLATE/bugreport.yml create mode 100644 .github/ISSUE_TEMPLATE/config.yml create mode 100644 .github/ISSUE_TEMPLATE/newfeature.yml create mode 100644 .github/PULL_REQUEST_TEMPLATE.md create mode 100644 .github/dependabot.yml create mode 100644 .github/workflows/codeql-analysis.yml create mode 100644 .github/workflows/pre-commit.yml create mode 100644 .github/workflows/release.yml create mode 100644 .github/workflows/test.yml create mode 100644 .gitignore create mode 100644 .pre-commit-config.yaml create mode 100644 AUTHORS.rst create mode 100644 CITATION.cff create mode 100644 CODE_OF_CONDUCT.rst create mode 100644 CONTRIBUTING.rst create mode 100644 HISTORY.rst create mode 100644 MANIFEST.in create mode 100644 README.rst create mode 100644 ci/requirements/environment-dev.yml create mode 100644 ci/requirements/environment.yml create mode 100644 conftest.py create mode 100644 noxfile.py create mode 100644 pygridmet/__init__.py create mode 100644 pygridmet/cli.py create mode 100644 pygridmet/core.py create mode 100644 pygridmet/exceptions.py create mode 100644 pygridmet/print_versions.py create mode 100644 pygridmet/py.typed create mode 100644 pygridmet/pygridmet.py create mode 100644 pyproject.toml create mode 100644 tests/test_exceptions.py create mode 100644 tests/test_pygridmet.py diff --git a/.codecov.yml b/.codecov.yml new file mode 100644 index 0000000..392261f --- /dev/null +++ b/.codecov.yml @@ -0,0 +1,13 @@ +codecov: + branch: main + +coverage: + status: + project: + default: + informational: true + patch: + default: + informational: true + +comment: false diff --git a/.deepsource.toml b/.deepsource.toml new file mode 100644 index 0000000..faf64f9 --- /dev/null +++ b/.deepsource.toml @@ -0,0 +1,13 @@ +exclude_patterns = [ + "tests/**", + "*/print_versions.py", + "*/__init__.py" +] +version = 1 + +[[analyzers]] +enabled = true +name = "python" + +[analyzers.meta] +runtime_version = "3.x.x" diff --git a/.git_archival.txt b/.git_archival.txt new file mode 100644 index 0000000..8fb235d --- /dev/null +++ b/.git_archival.txt @@ -0,0 +1,4 @@ +node: $Format:%H$ +node-date: $Format:%cI$ +describe-name: $Format:%(describe:tags=true,match=*[0-9]*)$ +ref-names: $Format:%D$ diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..1c5d576 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,4 @@ +# reduce the number of merge conflicts +HISTORY.rst merge=union +# allow installing from git archives +.git_archival.txt export-subst diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000..b7939e2 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1 @@ +github: [cheginit] diff --git a/.github/ISSUE_TEMPLATE/bugreport.yml b/.github/ISSUE_TEMPLATE/bugreport.yml new file mode 100644 index 0000000..3417639 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bugreport.yml @@ -0,0 +1,74 @@ +name: 🐛 Bug Report +description: File a bug report to help us improve +labels: [bug, needs triage] +body: +- type: textarea + id: what-happened + attributes: + label: What happened? + description: | + Thanks for reporting a bug! Please describe what you were trying to get done. + Tell us what happened, what went wrong. + validations: + required: true + +- type: textarea + id: what-did-you-expect-to-happen + attributes: + label: What did you expect to happen? + description: | + Describe what you expected to happen. + validations: + required: false + +- type: textarea + id: sample-code + attributes: + label: Minimal Complete Verifiable Example + description: | + Minimal, self-contained copy-pastable example that demonstrates the issue. This will be automatically formatted into code, so no need for markdown backticks. + render: Python + +- type: checkboxes + id: mvce-checkboxes + attributes: + label: MVCE confirmation + description: | + Please confirm that the bug report is in an excellent state, so we can understand & fix it quickly & efficiently. For more details, check out: + + - [Minimal Complete Verifiable Examples](https://stackoverflow.com/help/mcve) + - [Craft Minimal Bug Reports](http://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports) + + options: + - label: Minimal example — the example is as focused as reasonably possible to demonstrate the underlying issue. + - label: Complete example — the example is self-contained, including all data and the text of any traceback. + - label: New issue — a search of GitHub Issues suggests this is not a duplicate. + +- type: textarea + id: log-output + attributes: + label: Relevant log output + description: Please copy and paste any relevant output/traceback. This will be automatically formatted into code, so no need for markdown backticks. + render: Python + +- type: textarea + id: extra + attributes: + label: Anything else we need to know? + description: | + Please describe any other information you want to share. + +- type: textarea + id: show-versions + attributes: + label: Environment + description: | + Paste the output of `pydaymet.show_versions()` between the `
` tags, leaving an empty line following the opening tag. + value: | +
+ + + +
+ validations: + required: true diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..581cb38 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,8 @@ +blank_issues_enabled: false +contact_links: +- name: 💡 Ideas & Discussion + url: https://github.com/hyriver/hyriver.github.io/discussions/categories/ideas + about: Do you have an idea how to improve HyRiver? Feel free to post it to the discussion forum that allows voting for other users. +- name: ⁉️ Help & Support + url: https://github.com/hyriver/hyriver.github.io/discussions/categories/q-a + about: Need help with installation or usage of HyRiver? Please use the discussion forum. diff --git a/.github/ISSUE_TEMPLATE/newfeature.yml b/.github/ISSUE_TEMPLATE/newfeature.yml new file mode 100644 index 0000000..b27ede6 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/newfeature.yml @@ -0,0 +1,35 @@ +name: 💡 Feature Request +description: Suggest an idea for HyRiver +labels: [enhancement] +body: +- type: textarea + id: description + attributes: + label: Is your feature request related to a problem? + description: | + Please do a quick search of existing issues to make sure that this has not been asked before. + Please provide a clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + validations: + required: true +- type: textarea + id: solution + attributes: + label: Describe the solution you'd like + description: | + A clear and concise description of what you want to happen. +- type: textarea + id: alternatives + attributes: + label: Describe alternatives you've considered + description: | + A clear and concise description of any alternative solutions or features you've considered. + validations: + required: false +- type: textarea + id: additional-context + attributes: + label: Additional context + description: | + Add any other context about the feature request here. + validations: + required: false diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..b71fd00 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,6 @@ + + + - [ ] Closes #xxxx + - [ ] Tests added and `nox` passes. + - [ ] Passes `pre-commit run --all-files` + - [ ] Changes and the contributor name are documented in `HISTORY.rst`. diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..509bd5f --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,8 @@ +version: 2 +updates: +- package-ecosystem: github-actions + directory: / + schedule: + interval: daily + commit-message: + prefix: 'BOT: [skip ci] ' diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml new file mode 100644 index 0000000..93e6b28 --- /dev/null +++ b/.github/workflows/codeql-analysis.yml @@ -0,0 +1,35 @@ +name: CodeQL + +on: + push: + branches: [main] + pull_request: + branches: [main] + schedule: + - cron: 30 1 * * 0 + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + CodeQL-Build: + runs-on: ubuntu-latest + + permissions: + security-events: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Initialize CodeQL + uses: github/codeql-action/init@v2 + with: + languages: python + + - name: Autobuild + uses: github/codeql-action/autobuild@v2 + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v2 diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 0000000..312d732 --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,17 @@ +name: Linting + +on: + pull_request: + workflow_dispatch: # allows you to trigger manually + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: excitedleigh/setup-nox@v2.1.0 + - run: nox -s pre-commit diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..c3d73c6 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,88 @@ +name: Publish + +on: + push: + tags: + - v* + workflow_dispatch: # allows you to trigger manually + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + changelog: + name: Create Release Notes + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Generate Changelog + run: |- + echo "Release Notes" > ${{ github.workflow }}-CHANGELOG.rst + echo "-------------" >> ${{ github.workflow }}-CHANGELOG.rst + PAT="^---" + L1=$(grep -n $PAT HISTORY.rst | sed -n 1p | cut -d ":" -f 1) + L2=$(grep -n $PAT HISTORY.rst | sed -n 2p | cut -d ":" -f 1) + awk "NR > $L1 && NR < $L2 - 1" HISTORY.rst >> ${{ github.workflow }}-CHANGELOG.rst + - name: Convert rst to md + uses: docker://pandoc/core + with: + args: >- + ${{ github.workflow }}-CHANGELOG.rst + --wrap=none + -t markdown + -o ${{ github.workflow }}-CHANGELOG.md + - name: Remove extra spaces + run: |- + sed -i 's/- /- /g' ${{ github.workflow }}-CHANGELOG.md + - name: Github Release + uses: softprops/action-gh-release@v1 + if: startsWith(github.ref, 'refs/tags/') + with: + body_path: ${{ github.workflow }}-CHANGELOG.md + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + build: + name: Create Release + runs-on: ubuntu-latest + env: + REPO_NAME: ${{ github.event.repository.name }} + name: pypi + url: https://pypi.org/p/${{ github.event.repository.name }} + permissions: + id-token: write + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@master + with: + python-version: 3.8 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools setuptools-scm wheel twine check-manifest + + - name: Build dist and wheel + run: | + git clean -xdf + git restore -SW . + python -m build --sdist --wheel . + + - name: Check built artifacts + run: | + python -m twine check dist/* + pwd + if [ -f dist/${REPO_NAME}-0.0.0.tar.gz ]; then + echo "❌ INVALID VERSION NUMBER" + exit 1 + else + echo "✅ Looks good" + fi + + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@v1.8.10 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..2f499ee --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,69 @@ +name: CI + +on: + push: + branches: + - '**' + tags-ignore: + - '**' + pull_request: + branches: + - '**' + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + test: + name: python ${{ matrix.python-version }}, ${{ matrix.os }} + runs-on: ${{ matrix.os }} + env: + REPO_NAME: ${{ github.event.repository.name }} + defaults: + run: + shell: bash -l {0} + strategy: + matrix: + python-version: [3.8, '3.11'] + os: [ubuntu-latest, macos-latest, windows-latest] + + steps: + - uses: actions/checkout@v4 + - name: Set environment variables + run: |- + echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV + echo "CONDA_ENV_FILE=ci/requirements/environment.yml" >> $GITHUB_ENV + echo "PYTHON_VERSION=${{ matrix.python-version }}" >> $GITHUB_ENV + - name: Setup micromamba + uses: mamba-org/setup-micromamba@v1 + with: + environment-file: ${{ env.CONDA_ENV_FILE }} + environment-name: ${{ env.REPO_NAME }}-tests + create-args: >- + python=${{ env.PYTHON_VERSION }} + cache-environment: true + cache-environment-key: ${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}} + - name: Install error reporter + if: ${{ matrix.os }} == 'ubuntu-latest' and ${{ matrix.python-version }} == '3.11' + run: | + python -m pip install pytest-github-actions-annotate-failures + - name: Install the package + run: |- + python -m pip install --no-deps . + - name: Version info + run: |- + NAME="$(echo ${REPO_NAME} | tr - _)" + python -c "import ${NAME}; ${NAME}.show_versions()" + - name: Run pytest + run: |- + pytest + - name: Run codecov and upload the report + uses: codecov/codecov-action@v3 + with: + file: ./coverage.xml + flags: unittests + env_vars: RUNNER_OS,PYTHON_VERSION + name: codecov-umbrella + fail_ci_if_error: false diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1b6e332 --- /dev/null +++ b/.gitignore @@ -0,0 +1,112 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# dotenv +.env + +# virtualenv +.venv +venv/ +ENV/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# IDE settings +.vscode/ + +# logs +.nvimlog +data/ +tags* +cache +.DS_STORE diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..83d0a3d --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,122 @@ +repos: +- repo: https://github.com/hadialqattan/pycln + rev: v2.4.0 + hooks: + - id: pycln + name: Find and remove unused import statements with pycln + args: [--config=pyproject.toml] + +- repo: https://github.com/bwhmather/ssort + rev: v0.11.6 + hooks: + - id: ssort + name: Sort top level statements with ssort + +- repo: https://github.com/MarcoGorelli/absolufy-imports + rev: v0.3.1 + hooks: + - id: absolufy-imports + name: Convert relative imports to absolute with absolufy-imports + +- repo: https://github.com/Instagram/Fixit + rev: v2.1.0 + hooks: + - id: fixit-fix + +- repo: https://github.com/psf/black + rev: 23.11.0 + hooks: + - id: black + name: Autoformat with black + +- repo: https://github.com/asottile/blacken-docs + rev: 1.16.0 + hooks: + - id: blacken-docs + name: Autoformat codes in docstrings with blacken-docs + additional_dependencies: [black] + args: [-t, py38, -l, '100'] + +- repo: https://github.com/charliermarsh/ruff-pre-commit + rev: v0.1.6 + hooks: + - id: ruff + name: Linting with Ruff + args: [--fix, --exit-non-zero-on-fix] + +- repo: https://github.com/PyCQA/doc8 + rev: v1.1.1 + hooks: + - id: doc8 + name: Check documentation formats with doc8 + args: [--max-line-length, '100'] + +- repo: https://github.com/codespell-project/codespell + rev: v2.2.6 + hooks: + - id: codespell + name: Check common misspellings in text files with codespell. + additional_dependencies: + - tomli + +- repo: https://github.com/dosisod/refurb + rev: v1.24.0 + hooks: + - id: refurb + name: Modernizing Python codebases using Refurb + additional_dependencies: + - numpy + +- repo: https://github.com/tox-dev/pyproject-fmt + rev: 1.5.1 + hooks: + - id: pyproject-fmt + name: Apply a consistent format to pyproject.toml + +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: check-added-large-files + args: [--maxkb=50000] + - id: mixed-line-ending + args: [--fix=lf] + - id: check-ast + - id: check-builtin-literals + - id: check-case-conflict + - id: check-docstring-first + - id: check-shebang-scripts-are-executable + - id: check-merge-conflict + - id: check-json + - id: check-toml + - id: check-xml + - id: check-yaml + - id: debug-statements + - id: destroyed-symlinks + - id: detect-private-key + - id: end-of-file-fixer + exclude: ^LICENSE|\.(html|csv|txt|svg|py)$ + - id: pretty-format-json + args: [--autofix, --no-ensure-ascii, --no-sort-keys] + - id: trailing-whitespace + args: [--markdown-linebreak-ext=md] + exclude: \.(html|svg)$ + +- repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks + rev: v2.11.0 + hooks: + - id: pretty-format-yaml + args: [--autofix, --indent, '2'] + +- repo: local + hooks: + - id: future-annotations + name: import annotations from __future__ + entry: from __future__ import annotations + language: pygrep + args: [--negate] + files: ^pygeohydro/ + types: [python] + exclude: | + (?x) + /(__init__\.py)|(conftest\.py)$ + |/tests/ diff --git a/AUTHORS.rst b/AUTHORS.rst new file mode 100644 index 0000000..f0d9e9a --- /dev/null +++ b/AUTHORS.rst @@ -0,0 +1,13 @@ +================ +Development Team +================ + +Lead +---- + +* `Taher Cheginil `__ + +Contributors +------------ + +None yet. Why not be the first? diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000..9e139cb --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,38 @@ +cff-version: 1.2.0 +message: "If you use this software, please cite it as below." +authors: +- family-names: "Chegini" + given-names: "Taher" + orcid: "https://orcid.org/0000-0002-5430-6000" +- family-names: "Li" + given-names: "Hong-Yi" + orcid: "https://orcid.org/0000-0002-9807-3851" +- family-names: "Leung" + given-names: "L. Ruby" + orcid: "https://orcid.org/0000-0002-3221-9467" +title: "HyRiver: Hydroclimate Data Retriever" +version: 0.11 +doi: 10.21105/joss.03175 +date-released: 2021-10-27 +url: "https://github.com/cheginit/HyRiver" +preferred-citation: + type: article + authors: + - family-names: "Chegini" + given-names: "Taher" + orcid: "https://orcid.org/0000-0002-5430-6000" + - family-names: "Li" + given-names: "Hong-Yi" + orcid: "https://orcid.org/0000-0002-9807-3851" + - family-names: "Leung" + given-names: "L. Ruby" + orcid: "https://orcid.org/0000-0002-3221-9467" + doi: "10.21105/joss.03175" + journal: "Journal of Open Source Software" + month: 10 + start: 1 + end: 3 + title: "HyRiver: Hydroclimate Data Retriever" + issue: 66 + volume: 6 + year: 2021 diff --git a/CODE_OF_CONDUCT.rst b/CODE_OF_CONDUCT.rst new file mode 100644 index 0000000..07bb7e2 --- /dev/null +++ b/CODE_OF_CONDUCT.rst @@ -0,0 +1,87 @@ +Contributor Covenant Code of Conduct +==================================== + +Our Pledge +---------- + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to making participation in our +project and our community a harassment-free experience for everyone, +regardless of age, body size, disability, ethnicity, sex +characteristics, gender identity and expression, level of experience, +education, socio-economic status, nationality, personal appearance, +race, religion, or sexual identity and orientation. + +Our Standards +------------- + +Examples of behavior that contributes to creating a positive environment +include: + +- Using welcoming and inclusive language +- Being respectful of differing viewpoints and experiences +- Gracefully accepting constructive criticism +- Focusing on what is best for the community +- Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +- The use of sexualized language or imagery and unwelcome sexual + attention or advances +- Trolling, insulting/derogatory comments, and personal or political + attacks +- Public or private harassment +- Publishing others' private information, such as a physical or + electronic address, without explicit permission +- Other conduct which could reasonably be considered inappropriate in a + professional setting + +Our Responsibilities +-------------------- + +Project maintainers are responsible for clarifying the standards of +acceptable behavior and are expected to take appropriate and fair +corrective action in response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, +or reject comments, commits, code, wiki edits, issues, and other +contributions that are not aligned to this Code of Conduct, or to ban +temporarily or permanently any contributor for other behaviors that they +deem inappropriate, threatening, offensive, or harmful. + +Scope +----- + +This Code of Conduct applies both within project spaces and in public +spaces when an individual is representing the project or its community. +Examples of representing a project or community include using an +official project e-mail address, posting via an official social media +account, or acting as an appointed representative at an online or +offline event. Representation of a project may be further defined and +clarified by project maintainers. + +Enforcement +----------- + +Instances of abusive, harassing, or otherwise unacceptable behavior may +be reported by contacting the project team at tchegini@uh.edu. All +complaints will be reviewed and investigated and will result in a +response that is deemed necessary and appropriate to the circumstances. +The project team is obligated to maintain confidentiality with regard to +the reporter of an incident. Further details of specific enforcement +policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in +good faith may face temporary or permanent repercussions as determined +by other members of the project's leadership. + +Attribution +----------- + +This Code of Conduct is adapted from the `Contributor +Covenant `__, version 1.4, +available at +https://www.contributor-covenant.org/version/1/4/code-of-conduct.html + +For answers to common questions about this code of conduct, see +https://www.contributor-covenant.org/faq diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst new file mode 100644 index 0000000..746fd68 --- /dev/null +++ b/CONTRIBUTING.rst @@ -0,0 +1,155 @@ +.. highlight:: shell + +============ +Contributing +============ + +Contributions are welcome, and they are greatly appreciated! Every little bit +helps, and credit will always be given. + +You can contribute in many ways: + +Types of Contributions +---------------------- + +Report Bugs +~~~~~~~~~~~ + +Report bugs at https://github.com/hyriver/pygridmet/issues. + +Fix Bugs +~~~~~~~~ + +Look through the GitHub issues for bugs. Anything tagged with "bug" and "help +wanted" is open to whoever wants to implement it. + +Implement Features +~~~~~~~~~~~~~~~~~~ + +Other than new features that you might have in mind, you can look through +the GitHub issues for features. Anything tagged with "enhancement" +and "help wanted" is open to whoever wants to implement it. + +Write Documentation +~~~~~~~~~~~~~~~~~~~ + +PyGridMet could always use more documentation, whether as part of the +official PyGridMet docs, in docstrings, or even on the web in blog posts, +articles, and such. + +Submit Feedback +~~~~~~~~~~~~~~~ + +The best way to send feedback is to file an issue at https://github.com/hyriver/pygridmet/issues. + +If you are proposing a feature: + +* Explain in detail how it would work. +* Keep the scope as narrow as possible, to make it easier to implement. +* Remember that this is a volunteer-driven project, and that contributions + are welcome :) + +Get Started! +------------ + +Ready to contribute? Here's how to set up pygridmet for local development. + +1. Fork the PyGridMet repo through the GitHub website. +2. Clone your fork locally and add the main ``pygridmet`` as the upstream remote: + +.. code-block:: console + + $ git clone git@github.com:your_name_here/pygridmet.git + $ git remote add upstream git@github.com:hyriver/pygridmet.git + +3. Install your local copy into a virtualenv. Assuming you have ``mamba`` installed, + this is how you can set up your fork for local development: + +.. code-block:: console + + $ cd pygridmet/ + $ mamba env create -f ci/requirements/environment-dev.yml + $ mamba activate pygridmet-dev + $ python -m pip install . --no-deps + +4. Create a branch for local development: + +.. code-block:: console + + $ git checkout -b bugfix-or-feature/name-of-your-bugfix-or-feature + $ git push + +5. Now you can make your changes locally, make sure to add a description of + the changes to ``HISTORY.rst`` file and add extra tests, if applicable, + to ``tests`` folder. Also, make sure to give yourself credit by adding + your name at the end of the item(s) that you add in the history like this + ``By `Taher Chegini `_``. Then, + fetch the latest updates from the remote and resolve any merge conflicts: + +.. code-block:: console + + $ git fetch upstream + $ git merge upstream/name-of-your-branch + +6. Then create a new environment for linting and another for testing: + +.. code-block:: console + + $ mamba create -n py11 python=3.11 nox tomli pre-commit codespell gdal + $ mamba activate py11 + $ nox -s pre-commit + $ nox -s type-check + + $ mamba create -n py38 python=3.8 nox tomli pre-commit codespell gdal + $ mamba activate py38 + $ nox -s tests + + Note that if Python 3.11 is already installed on your system, you can + skip creating the ``py11`` environment and just use your system's Python 3.11 + to run the linting and type-checking tests, like this: + +.. code-block:: console + + $ mamba create -n py38 python=3.8 nox tomli pre-commit codespell gdal + $ mamba activate py38 + $ nox + +7. If you are making breaking changes make sure to reflect them in + the documentation, ``README.rst``, and tests if necessary. + +8. Commit your changes and push your branch to GitHub. Start the commit message with + ``ENH:``, ``BUG:``, ``DOC:`` to indicate whether the commit is a new feature, + documentation related, or a bug fix. For example: + +.. code-block:: console + + $ git add . + $ git commit -m "ENH: A detailed description of your changes." + $ git push origin name-of-your-branch + +9. Submit a pull request through the GitHub website. + +Tips +---- + +To run a subset of tests: + +.. code-block:: console + + $ nox -s tests -- -n=1 -k "test_name1 or test_name2" + +Deploying +--------- + +A reminder for the maintainers on how to deploy. +Make sure all your changes are committed (including an entry in HISTORY.rst). +Then run: + +.. code-block:: console + + $ git tag -a vX.X.X -m "vX.X.X" + $ git push --follow-tags + +where ``X.X.X`` is the version number following the +`semantic versioning spec `__ i.e., MAJOR.MINOR.PATCH. +Then release the tag from Github and Github Actions will deploy it to PyPi. diff --git a/HISTORY.rst b/HISTORY.rst new file mode 100644 index 0000000..a6b522a --- /dev/null +++ b/HISTORY.rst @@ -0,0 +1,8 @@ +======= +History +======= + +0.16.0 (2023-XX-XX) +------------------- + +- Initial release on PyPI. diff --git a/LICENSE b/LICENSE index b16ccbf..2275ac5 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,10 @@ +======= +License +======= + MIT License -Copyright (c) 2023 HyRiver +Copyright (c) 2020, Taher Chegini Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..5e6b727 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,11 @@ +include LICENSE +include README.rst + +graft docs + +prune docs/_build +prune docs/data +prune tests/data + +global-exclude .DS_Store +global-exclude *.py[cod] diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..4999027 --- /dev/null +++ b/README.rst @@ -0,0 +1,331 @@ +.. image:: https://raw.githubusercontent.com/hyriver/HyRiver-examples/main/notebooks/_static/pygeoutils_logo.png + :target: https://github.com/hyriver/HyRiver + +| + +.. image:: https://joss.theoj.org/papers/b0df2f6192f0a18b9e622a3edff52e77/status.svg + :target: https://joss.theoj.org/papers/b0df2f6192f0a18b9e622a3edff52e77 + :alt: JOSS + +| + +.. |pygeohydro| image:: https://github.com/hyriver/pygeohydro/actions/workflows/test.yml/badge.svg + :target: https://github.com/hyriver/pygeohydro/actions/workflows/test.yml + :alt: Github Actions + +.. |pygeoogc| image:: https://github.com/hyriver/pygeoogc/actions/workflows/test.yml/badge.svg + :target: https://github.com/hyriver/pygeoogc/actions/workflows/test.yml + :alt: Github Actions + +.. |pygeoutils| image:: https://github.com/hyriver/pygeoutils/actions/workflows/test.yml/badge.svg + :target: https://github.com/hyriver/pygeoutils/actions/workflows/test.yml + :alt: Github Actions + +.. |pynhd| image:: https://github.com/hyriver/pynhd/actions/workflows/test.yml/badge.svg + :target: https://github.com/hyriver/pynhd/actions/workflows/test.yml + :alt: Github Actions + +.. |py3dep| image:: https://github.com/hyriver/py3dep/actions/workflows/test.yml/badge.svg + :target: https://github.com/hyriver/py3dep/actions/workflows/test.yml + :alt: Github Actions + +.. |pydaymet| image:: https://github.com/hyriver/pydaymet/actions/workflows/test.yml/badge.svg + :target: https://github.com/hyriver/pydaymet/actions/workflows/test.yml + :alt: Github Actions + +.. |pygridmet| image:: https://github.com/hyriver/pygridmet/actions/workflows/test.yml/badge.svg + :target: https://github.com/hyriver/pygridmet/actions/workflows/test.yml + :alt: Github Actions + +.. |pynldas2| image:: https://github.com/hyriver/pynldas2/actions/workflows/test.yml/badge.svg + :target: https://github.com/hyriver/pynldas2/actions/workflows/test.yml + :alt: Github Actions + +.. |async| image:: https://github.com/hyriver/async-retriever/actions/workflows/test.yml/badge.svg + :target: https://github.com/hyriver/async-retriever/actions/workflows/test.yml + :alt: Github Actions + +.. |signatures| image:: https://github.com/hyriver/hydrosignatures/actions/workflows/test.yml/badge.svg + :target: https://github.com/hyriver/hydrosignatures/actions/workflows/test.yml + :alt: Github Actions + +================ ==================================================================== ============ +Package Description Status +================ ==================================================================== ============ +PyNHD_ Navigate and subset NHDPlus (MR and HR) using web services |pynhd| +Py3DEP_ Access topographic data through National Map's 3DEP web service |py3dep| +PyGeoHydro_ Access NWIS, NID, WQP, eHydro, NLCD, CAMELS, and SSEBop databases |pygeohydro| +PyDaymet_ Access daily, monthly, and annual climate data via Daymet |pydaymet| +PyGridMet_ Access daily climate data via GridMet |pygridmet| +PyNLDAS2_ Access hourly NLDAS-2 data via web services |pynldas2| +HydroSignatures_ A collection of tools for computing hydrological signatures |signatures| +AsyncRetriever_ High-level API for asynchronous requests with persistent caching |async| +PyGeoOGC_ Send queries to any ArcGIS RESTful-, WMS-, and WFS-based services |pygeoogc| +PyGeoUtils_ Utilities for manipulating geospatial, (Geo)JSON, and (Geo)TIFF data |pygeoutils| +================ ==================================================================== ============ + +.. _PyGeoHydro: https://github.com/hyriver/pygeohydro +.. _AsyncRetriever: https://github.com/hyriver/async-retriever +.. _PyGeoOGC: https://github.com/hyriver/pygeoogc +.. _PyGeoUtils: https://github.com/hyriver/pygeoutils +.. _PyNHD: https://github.com/hyriver/pynhd +.. _Py3DEP: https://github.com/hyriver/py3dep +.. _PyDaymet: https://github.com/hyriver/pydaymet +.. _PyGridMet: https://github.com/hyriver/pygridmet +.. _PyNLDAS2: https://github.com/hyriver/pynldas2 +.. _HydroSignatures: https://github.com/hyriver/hydrosignatures + +PyGridMet: Daily climate data through GridMet +--------------------------------------------- + +.. image:: https://img.shields.io/pypi/v/pygridmet.svg + :target: https://pypi.python.org/pypi/pygridmet + :alt: PyPi + +.. image:: https://img.shields.io/conda/vn/conda-forge/pygridmet.svg + :target: https://anaconda.org/conda-forge/pygridmet + :alt: Conda Version + +.. image:: https://codecov.io/gh/hyriver/pygridmet/branch/main/graph/badge.svg + :target: https://codecov.io/gh/hyriver/pygridmet + :alt: CodeCov + +.. image:: https://img.shields.io/pypi/pyversions/pygridmet.svg + :target: https://pypi.python.org/pypi/pygridmet + :alt: Python Versions + +.. image:: https://static.pepy.tech/badge/pygridmet + :target: https://pepy.tech/project/pygridmet + :alt: Downloads + +| + +.. image:: https://www.codefactor.io/repository/github/hyriver/pygridmet/badge + :target: https://www.codefactor.io/repository/github/hyriver/pygridmet + :alt: CodeFactor + +.. image:: https://img.shields.io/badge/code%20style-black-000000.svg + :target: https://github.com/psf/black + :alt: black + +.. image:: https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white + :target: https://github.com/pre-commit/pre-commit + :alt: pre-commit + +.. image:: https://mybinder.org/badge_logo.svg + :target: https://mybinder.org/v2/gh/hyriver/HyRiver-examples/main?urlpath=lab/tree/notebooks + :alt: Binder + +| + +Features +-------- + +PyGridMet is a part of `HyRiver `__ software stack that +is designed to aid in hydroclimate analysis through web services. This package provides +access to daily climate data over contermonious US (CONUS) from +`GridMet `__ database using NetCDF +Subset Service (NCSS). Both single pixel (using ``get_bycoords`` function) and gridded data (using +``get_bygeom``) are supported which are returned as +``pandas.DataFrame`` and ``xarray.Dataset``, respectively. + +You can find some example notebooks `here `__. + +Moreover, under the hood, PyGridMet uses +`PyGeoOGC `__ and +`AsyncRetriever `__ packages +for making requests in parallel and storing responses in chunks. This improves the +reliability and speed of data retrieval significantly. + +You can also try using PyGridMet without installing +it on your system by clicking on the binder badge. A Jupyter Lab +instance with the HyRiver stack pre-installed will be launched in your web browser, and you +can start coding! + +Moreover, requests for additional functionalities can be submitted via +`issue tracker `__. + +Citation +-------- +If you use any of HyRiver packages in your research, we appreciate citations: + +.. code-block:: bibtex + + @article{Chegini_2021, + author = {Chegini, Taher and Li, Hong-Yi and Leung, L. Ruby}, + doi = {10.21105/joss.03175}, + journal = {Journal of Open Source Software}, + month = {10}, + number = {66}, + pages = {1--3}, + title = {{HyRiver: Hydroclimate Data Retriever}}, + volume = {6}, + year = {2021} + } + +Installation +------------ + +You can install PyGridMet using ``pip`` as follows: + +.. code-block:: console + + $ pip install pygridmet + +Alternatively, PyGridMet can be installed from the ``conda-forge`` repository +using `Conda `__: + +.. code-block:: console + + $ conda install -c conda-forge pygridmet + +Quick start +----------- + +You can use PyGridMet using command-line or as a Python library. The commanda-line +provides access to two functionality: + +- Getting gridded climate data: You must create a ``geopandas.GeoDataFrame`` that contains + the geometries of the target locations. This dataframe must have four columns: + ``id``, ``start``, ``end``, ``geometry``. The ``id`` column is used as + filenames for saving the obtained climate data to a NetCDF (``.nc``) file. The ``start`` + and ``end`` columns are starting and ending dates of the target period. Then, + you must save the dataframe as a shapefile (``.shp``) or geopackage (``.gpkg``) with + CRS attribute. +- Getting single pixel climate data: You must create a CSV file that + contains coordinates of the target locations. This file must have at four columns: + ``id``, ``start``, ``end``, ``lon``, and ``lat``. The ``id`` column is used as filenames + for saving the obtained climate data to a CSV (``.csv``) file. The ``start`` and ``end`` + columns are the same as the ``geometry`` command. The ``lon`` and ``lat`` columns are + the longitude and latitude coordinates of the target locations. + +.. code-block:: console + + $ pygridmet -h + Usage: pygridmet [OPTIONS] COMMAND [ARGS]... + + Command-line interface for PyGridMet. + + Options: + -h, --help Show this message and exit. + + Commands: + coords Retrieve climate data for a list of coordinates. + geometry Retrieve climate data for a dataframe of geometries. + +The ``coords`` sub-command is as follows: + +.. code-block:: console + + $ pygridmet coords -h + Usage: pygridmet coords [OPTIONS] FPATH + + Retrieve climate data for a list of coordinates. + + FPATH: Path to a csv file with four columns: + - ``id``: Feature identifiers that gridmet uses as the output netcdf filenames. + - ``start``: Start time. + - ``end``: End time. + - ``lon``: Longitude of the points of interest. + - ``lat``: Latitude of the points of interest. + - ``snow``: (optional) Separate snowfall from precipitation, default is ``False``. + + Examples: + $ cat coords.csv + id,lon,lat,start,end + california,-122.2493328,37.8122894,2012-01-01,2014-12-31 + $ pygridmet coords coords.csv -v pr -v tmmn + + Options: + -v, --variables TEXT Target variables. You can pass this flag multiple + times for multiple variables. + -s, --save_dir PATH Path to a directory to save the requested files. + Extension for the outputs is .nc for geometry and .csv + for coords. + --disable_ssl Pass to disable SSL certification verification. + -h, --help Show this message and exit. + +And, the ``geometry`` sub-command is as follows: + +.. code-block:: console + + $ pygridmet geometry -h + Usage: pygridmet geometry [OPTIONS] FPATH + + Retrieve climate data for a dataframe of geometries. + + FPATH: Path to a shapefile (.shp) or geopackage (.gpkg) file. + This file must have four columns and contain a ``crs`` attribute: + - ``id``: Feature identifiers that gridmet uses as the output netcdf filenames. + - ``start``: Start time. + - ``end``: End time. + - ``geometry``: Target geometries. + - ``snow``: (optional) Separate snowfall from precipitation, default is ``False``. + + Examples: + $ pygridmet geometry geo.gpkg -v pr -v tmmn + + Options: + -v, --variables TEXT Target variables. You can pass this flag multiple + times for multiple variables. + -s, --save_dir PATH Path to a directory to save the requested files. + Extension for the outputs is .nc for geometry and .csv + for coords. + --disable_ssl Pass to disable SSL certification verification. + -h, --help Show this message and exit. + +Now, let's see how we can use PyGridMet as a library. + +PyGridMet offers two functions for getting climate data; ``get_bycoords`` and ``get_bygeom``. +The arguments of these functions are identical except the first argument where the latter +should be polygon and the former should be a coordinate (a tuple of length two as in (x, y)). +The input geometry or coordinate can be in any valid CRS (defaults to ``EPSG:4326``). The +``dates`` argument can be either a tuple of length two like ``(start_str, end_str)`` or a list of +years like ``[2000, 2005]``. It is noted that both functions have a ``snow`` flag for separating +snow from precipitation using +`Martinez and Gupta (2010) `__ method. + +.. code-block:: python + + from pynhd import NLDI + import pygridmet as gridmet + + geometry = NLDI().get_basins("01031500").geometry[0] + + var = ["prcp", "tmin"] + dates = ("2000-01-01", "2000-06-30") + + daily = gridmet.get_bygeom(geometry, dates, variables=var, snow=True) + +.. image:: https://raw.githubusercontent.com/hyriver/HyRiver-examples/main/notebooks/_static/gridmet_grid.png + :target: https://github.com/hyriver/HyRiver-examples/blob/main/notebooks/gridmet.ipynb + +If the input geometry (or coordinate) is in a CRS other than ``EPSG:4326``, we should pass +it to the functions. + +.. code-block:: python + + coords = (-1431147.7928, 318483.4618) + crs = 3542 + dates = ("2000-01-01", "2006-12-31") + data = gridmet.get_bycoords(coords, dates, variables=var, loc_crs=crs) + +.. image:: https://raw.githubusercontent.com/hyriver/HyRiver-examples/main/notebooks/_static/gridmet_loc.png + :target: https://github.com/hyriver/HyRiver-examples/blob/main/notebooks/gridmet.ipynb + +Additionally, the ``get_bycoords`` function accepts a list of coordinates and by setting the +``to_xarray`` flag to ``True`` it can return the results as a ``xarray.Dataset`` instead of +a ``pandas.DataFrame``: + +.. code-block:: python + + coords = [(-94.986, 29.973), (-95.478, 30.134)] + idx = ["P1", "P2"] + clm_ds = gridmet.get_bycoords(coords, range(2000, 2021), coords_id=idx, to_xarray=True) + +Contributing +------------ + +Contributions are very welcomed. Please read +`CONTRIBUTING.rst `__ +file for instructions. diff --git a/ci/requirements/environment-dev.yml b/ci/requirements/environment-dev.yml new file mode 100644 index 0000000..51dfc15 --- /dev/null +++ b/ci/requirements/environment-dev.yml @@ -0,0 +1,167 @@ +name: hyriver-dev +channels: +- conda-forge +- nodefaults +dependencies: +- python>=3.10 + +# async-retriever deps +- aiodns +- aiohttp-client-cache >=0.8.1 +- brotli +- cytoolz +- nest-asyncio +- ujson + +# pygeoogc deps +# - async-retriever>=0.15,<0.16 +- cytoolz +- defusedxml +- joblib +- multidict +- owslib>=0.27.2 +- pyproj>=3.0.1 +- requests +- requests-cache>=0.9.6 +- shapely>=2.0.0 +- ujson +- url-normalize>=1.4 +- urllib3 +- yarl + +# pygeoutils deps +- cytoolz +- geopandas-base >=0.7 +- fiona +- netcdf4 +- numpy >=1.21 +- pyproj >=2.2 +- rasterio >=1.2 +- rioxarray >=0.11 +- scipy +- shapely >=2.0 +- ujson +- xarray >=2023.01.0 + +# hydrosignatures deps +- numpy +- pandas +- scipy +- xarray +# optional deps +- numba + +# py3dep +# - async-retriever >=0.3.6 +- cytoolz +- numpy >=1.21 +# - pygeoogc >=0.13.7 +# - pygeoutils >=0.13.7 +- rasterio >=1.2 +- rioxarray >=0.11 +- scipy +- shapely >=2.0 +- xarray >=2023.01.0 +# optional dep +- pyflwdir >=0.5.6 + +# pynhd deps +# - async-retriever >=0.3.6 +- cytoolz +- geopandas-base >=0.9 +- networkx +- numpy >=1.21 +- pandas >=1.0 +- pyarrow >=1.0.1 +# - pygeoogc >=0.13.7 +# - pygeoutils >=0.13.7 +- shapely >=2.0 +# optional deps +- pyogrio +- py7zr + +# pydaymet deps +# - async-retriever >=0.3.6 +- lxml +- numpy >=1.21 +- pandas >=1.0 +# - py3dep >=0.13.7 +# - pygeoogc >=0.13.7 +# - pygeoutils >=0.13.9 +- rasterio >=1.2 +- scipy +- shapely >=2.0 +- xarray >=2023.01.0 +# optional deps +- numba + +# pygeohydro deps +- cytoolz +- defusedxml +- folium +- geopandas-base >=0.7 +- h5netcdf +# - hydrosignatures >=0.1.1 +- lxml +- matplotlib-base >=3.5 +- numpy >=1.21 +- pandas >=1.0 +# - pygeoogc >=0.13.7 +# - pygeoutils >=0.13.9 +# - pynhd >=0.13.7 +- rasterio >=1.2 +- rioxarray >=0.11.0 +- scipy +- shapely >=2.0 +- xarray >=2023.01.0 +# optional deps +- planetary-computer +- pystac-client + +# pynldas2 +# - async-retriever >=0.3.6 +- h5netcdf +- numpy >=1.21 +- pandas >=1.0 +# - pygeoutils >=0.13.10 +- pyproj >=2.2 +- rioxarray >=0.11 +- xarray >=2023.01.0 + +# optional deps for speeding up some operations +- bottleneck + +# Other deps required by example notebooks +- mapclassify +- contextily +- hvplot +- osmnx +- tqdm +- pytables +- ffmpeg +- xarray-spatial +- datashader + +# dev deps +- ipywidgets +- ipykernel +- codespell +- tomli +- nox +- pre-commit +- psutil +- pytest-cov +- pytest-xdist +- pytest-sugar + +- pip +- pip: + - git+https://github.com/hyriver/async-retriever.git + - git+https://github.com/hyriver/hydrosignatures.git + - git+https://github.com/hyriver/pygeoogc.git + - git+https://github.com/hyriver/pygeoutils.git + - git+https://github.com/hyriver/pynhd.git + - git+https://github.com/hyriver/py3dep.git + - git+https://github.com/hyriver/pydaymet.git + - git+https://github.com/hyriver/pynldas2.git + - git+https://github.com/hyriver/pygeohydro.git diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml new file mode 100644 index 0000000..f302ad5 --- /dev/null +++ b/ci/requirements/environment.yml @@ -0,0 +1,89 @@ +name: pydaymet-tests +channels: +- conda-forge +- nodefaults +dependencies: + # async-retriever deps +- aiodns +- aiosqlite +- aiohttp >=3.8.3 +- brotli +- cytoolz +- nest-asyncio +- aiohttp-client-cache >=0.8.1 +- ujson +- cytoolz + + # pygeoogc deps + # - async-retriever >=0.3.5 +- multidict +- url-normalize>=1.4 +- yarl +- cytoolz +- defusedxml +- owslib >=0.27.2 +- pyproj >=2.2 +- requests +- requests-cache >=0.9.6 +- shapely >=2.0 +- urllib3 + + # pygeoutils deps +- cytoolz +- geopandas-base >=0.7 +- netcdf4 +- numpy >=1.17 +- pyproj >=2.2 +- rasterio >=1.2 +- rioxarray >=0.11 +- scipy +- shapely >=2.0 +- ujson +- xarray >=2023.01.0 + + # py3dep + # - async-retriever >=0.3.6 +- click >=0.7 +- cytoolz +- numpy >=1.17 + # - pygeoogc >=0.13.7 + # - pygeoutils >=0.13.7 +- rasterio >=1.2 +- rioxarray >=0.11 +- scipy +- shapely >=2.0 +- xarray >=2023.01.0 + # optional dep +- pyflwdir + + # pydaymet deps + # - async-retriever >=0.3.6 +- click >=0.7 + +- lxml +- numpy >=1.17 +- pandas >=1.0 + # - py3dep >=0.13.7 + # - pygeoogc >=0.13.7 + # - pygeoutils >=0.13.9 +- rasterio >=1.2 +- scipy +- shapely >=2.0 +- xarray >=2023.01.0 + + # optional deps to speed up xarray and pydaymet +- bottleneck +- numba + + # test deps +- pyarrow>=1.0.1 +- psutil +- pytest-cov +- pytest-xdist + +- pip +- pip: + - git+https://github.com/hyriver/async-retriever.git + - git+https://github.com/hyriver/pygeoogc.git + - git+https://github.com/hyriver/pygeoutils.git + - git+https://github.com/hyriver/py3dep.git diff --git a/conftest.py b/conftest.py new file mode 100644 index 0000000..db3dd2f --- /dev/null +++ b/conftest.py @@ -0,0 +1,18 @@ +"""Configuration for pytest.""" + +import pytest +from click.testing import CliRunner + + +@pytest.fixture() +def runner(): + """Return a CliRunner.""" + return CliRunner() + + +@pytest.fixture(autouse=True) +def _add_standard_imports(doctest_namespace): + """Add pygridmet namespace for doctest.""" + import pygridmet as gridmet + + doctest_namespace["gridmet"] = gridmet diff --git a/noxfile.py b/noxfile.py new file mode 100644 index 0000000..c7c76d1 --- /dev/null +++ b/noxfile.py @@ -0,0 +1,126 @@ +"""Nox sessions.""" +from __future__ import annotations + +import shutil +from pathlib import Path + +import nox + +try: + import tomllib as tomli +except ImportError: + import tomli + + +def get_package_name() -> str: + """Get the name of the package.""" + with Path("pyproject.toml").open("rb") as f: + return tomli.load(f)["project"]["name"] + + +def get_extras() -> list[str]: + """Get the name of the package.""" + with Path("pyproject.toml").open("rb") as f: + extras = tomli.load(f)["project"]["optional-dependencies"] + return [e for e in extras if e not in ("test", "typeguard")] + + +def get_deps() -> list[str]: + """Get the name of the package.""" + with Path("pyproject.toml").open("rb") as f: + return tomli.load(f)["project"]["dependencies"] + + +python_versions = ["3.8"] +lint_versions = ["3.11"] +package = get_package_name() +gh_deps = { + "async-retriever": [], + "hydrosignatures": [], + "pygeoogc": ["async-retriever"], + "pygeoutils": ["async-retriever", "pygeoogc"], + "pynhd": ["async-retriever", "pygeoogc", "pygeoutils"], + "py3dep": ["async-retriever", "pygeoogc", "pygeoutils"], + "pygeohydro": ["async-retriever", "pygeoogc", "pygeoutils", "pynhd", "hydrosignatures"], + "pydaymet": ["async-retriever", "pygeoogc", "pygeoutils", "py3dep"], + "pygridmet": ["async-retriever", "pygeoogc", "pygeoutils"], + "pynldas2": ["async-retriever", "pygeoutils"], +} +nox.options.sessions = ( + "pre-commit", + "type-check", + "tests", +) + + +def install_deps( + session: nox.Session, extra: str | None = None, version_limit: list[str] | None = None +) -> None: + """Install package dependencies.""" + deps = [f".[{extra}]"] if extra else ["."] + deps += [f"git+https://github.com/hyriver/{p}.git" for p in gh_deps[package]] + if version_limit: + deps += list(version_limit) + session.install(*deps) + dirs = [".pytest_cache", "build", "dist", ".eggs"] + for d in dirs: + shutil.rmtree(d, ignore_errors=True) + + patterns = ["*.egg-info", "*.egg", "*.pyc", "*~", "**/__pycache__"] + for p in patterns: + for f in Path.cwd().rglob(p): + shutil.rmtree(f, ignore_errors=True) + + +@nox.session(name="pre-commit", python=lint_versions) +def pre_commit(session: nox.Session) -> None: + """Lint using pre-commit.""" + session.install("pre-commit") + session.run( + "pre-commit", + "run", + "--all-files", + "--hook-stage=manual", + *session.posargs, + ) + + +@nox.session(name="type-check", python=python_versions) +def type_check(session: nox.Session) -> None: + """Run Pyright.""" + extras = get_extras() + install_deps(session, ",".join(extras)) + session.install("pyright") + session.run("pyright") + + +@nox.session(python=python_versions) +def tests(session: nox.Session) -> None: + """Run the test suite.""" + extras = get_extras() + try: + extras.remove("speedup") + speedup = True + except ValueError: + speedup = False + + install_deps(session, ",".join(["test", *extras])) + session.run("pytest", "--doctest-modules", *session.posargs) + session.notify("cover") + if speedup: + session.notify("speedup") + + +@nox.session(python=python_versions) +def speedup(session: nox.Session) -> None: + extras = get_extras() + install_deps(session, ",".join(["test", *extras])) + session.run("pytest", "--doctest-modules", "-m", "speedup", *session.posargs) + + +@nox.session +def cover(session: nox.Session) -> None: + """Coverage analysis.""" + session.install("coverage[toml]") + session.run("coverage", "report") + session.run("coverage", "erase") diff --git a/pygridmet/__init__.py b/pygridmet/__init__.py new file mode 100644 index 0000000..1df05a1 --- /dev/null +++ b/pygridmet/__init__.py @@ -0,0 +1,31 @@ +"""Top-level package for PyGridMet.""" +from importlib.metadata import PackageNotFoundError, version + +from pygridmet.core import GridMet +from pygridmet.exceptions import ( + InputRangeError, + InputTypeError, + InputValueError, + MissingCRSError, + MissingItemError, +) +from pygridmet.print_versions import show_versions +from pygridmet.pygridmet import get_bycoords, get_bygeom + +try: + __version__ = version("pygridmet") +except PackageNotFoundError: + __version__ = "999" + +__all__ = [ + "GridMet", + "get_bycoords", + "get_bygeom", + "potential_et", + "show_versions", + "InputRangeError", + "InputTypeError", + "InputValueError", + "MissingItemError", + "MissingCRSError", +] diff --git a/pygridmet/cli.py b/pygridmet/cli.py new file mode 100644 index 0000000..ad6a3dd --- /dev/null +++ b/pygridmet/cli.py @@ -0,0 +1,207 @@ +"""Command-line interface for PyGridMet.""" +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING, TypeVar + +import click +import geopandas as gpd +import pandas as pd + +from pygridmet import pygridmet as gridmet +from pygridmet.exceptions import ( + InputTypeError, + InputValueError, + MissingCRSError, + MissingItemError, +) + +if TYPE_CHECKING: + DFType = TypeVar("DFType", pd.DataFrame, gpd.GeoDataFrame) + + +def parse_snow(target_df: pd.DataFrame) -> pd.DataFrame: + """Parse the snow dataframe.""" + if target_df["snow"].dtype != bool: + target_df["snow"] = target_df.snow.str.lower().str.strip() + if not target_df.snow.str.contains("true|false").all(): + raise InputValueError("snow", "true or false") + target_df["snow"] = target_df.snow == "true" + return target_df + + +def get_target_df(tdf: DFType, req_cols: list[str]) -> DFType: + """Check if all required columns exists in the dataframe. + + It also re-orders the columns based on req_cols order. + """ + missing = [c for c in req_cols if c not in tdf] + if missing: + raise MissingItemError(missing) + return tdf[req_cols] # pyright: ignore[reportGeneralTypeIssues] + + +def get_required_cols(geom_type: str, columns: pd.Index) -> list[str]: + """Get the required columns for a given geometry type.""" + req_cols = ["id", geom_type, "dates"] + return req_cols + list({"snow"}.intersection(columns)) + + +variables_opt = click.option( + "--variables", + "-v", + multiple=True, + default=["prcp"], + help="Target variables. You can pass this flag multiple times for multiple variables.", +) + +save_dir_opt = click.option( + "-s", + "--save_dir", + type=click.Path(exists=False), + default="clm_gridmet", + help=" ".join( + ( + "Path to a directory to save the requested files.", + "Extension for the outputs is .nc for geometry and .csv for coords.", + ) + ), +) + +ssl_opt = click.option( + "--disable_ssl", is_flag=True, help="Pass to disable SSL certification verification." +) + +CONTEXT_SETTINGS = {"help_option_names": ["-h", "--help"]} + + +@click.group(context_settings=CONTEXT_SETTINGS) +def cli() -> None: + """Command-line interface for Pygridmet.""" + + +@cli.command("coords", context_settings=CONTEXT_SETTINGS) +@click.argument("fpath", type=click.Path(exists=True)) +@variables_opt +@save_dir_opt +@ssl_opt +def coords( + fpath: Path, + variables: list[str] | str | None = None, + save_dir: str | Path = "clm_gridmet", + disable_ssl: bool = False, +) -> None: + """Retrieve climate data for a list of coordinates. + + \b + FPATH: Path to a csv file with four columns: + - ``id``: Feature identifiers that gridmet uses as the output netcdf filenames. + - ``start``: Start time. + - ``end``: End time. + - ``lon``: Longitude of the points of interest. + - ``lat``: Latitude of the points of interest. + - ``snow``: (optional) Separate snowfall from precipitation, default is ``False``. + + \b + Examples: + $ cat coords.csv + id,lon,lat,start,end + california,-122.2493328,37.8122894,2012-01-01,2014-12-31 + $ pygridmet coords coords.csv -v prcp -v tmin + """ # noqa: D301 + fpath = Path(fpath) + if fpath.suffix != ".csv": + raise InputTypeError("file", ".csv") + + target_df = get_target_df(pd.read_csv(fpath), ["id", "start", "end", "lon", "lat"]) + target_df["dates"] = list(target_df[["start", "end"]].itertuples(index=False, name=None)) + target_df["coords"] = list(target_df[["lon", "lat"]].itertuples(index=False, name=None)) + if "snow" in target_df: + target_df = parse_snow(target_df) + + req_cols = get_required_cols("coords", target_df.columns) + target_df = target_df[req_cols] + + count = "1 point" if len(target_df) == 1 else f"{len(target_df)} points" + click.echo(f"Found coordinates of {count} in {fpath.resolve()}.") + + Path(save_dir).mkdir(parents=True, exist_ok=True) + with click.progressbar( + target_df.itertuples(index=False, name=None), + label="Getting single-pixel climate data", + length=len(target_df), + ) as bar: + for i, *args in bar: + fname = Path(save_dir, f"{i}.csv") + if fname.exists(): + continue + kwrgs = dict(zip(req_cols[1:], args)) + clm = gridmet.get_bycoords(**kwrgs, variables=variables, ssl=not disable_ssl) + clm.to_csv(fname, index=False) + click.echo("Done.") + + +@cli.command("geometry", context_settings=CONTEXT_SETTINGS) +@click.argument("fpath", type=click.Path(exists=True)) +@variables_opt +@save_dir_opt +@ssl_opt +def geometry( + fpath: Path, + variables: list[str] | str | None = None, + save_dir: str | Path = "clm_gridmet", + disable_ssl: bool = False, +) -> None: + """Retrieve climate data for a dataframe of geometries. + + \b + FPATH: Path to a shapefile (.shp) or geopackage (.gpkg) file. + This file must have four columns and contain a ``crs`` attribute: + - ``id``: Feature identifiers that gridmet uses as the output netcdf filenames. + - ``start``: Start time. + - ``end``: End time. + - ``geometry``: Target geometries. + - ``snow``: (optional) Separate snowfall from precipitation, default is ``False``. + + \b + Examples: + $ pygridmet geometry geo.gpkg -v prcp -v tmin + """ # noqa: D301 + fpath = Path(fpath) + if fpath.suffix not in (".shp", ".gpkg"): + raise InputTypeError("file", ".shp or .gpkg") + + target_df = gpd.read_file(fpath) + if target_df.crs is None: + raise MissingCRSError + + if "undefined geographic" in target_df.crs.name.lower(): + raise MissingCRSError + + target_df = get_target_df(target_df, ["id", "start", "end", "geometry"]) + target_df["dates"] = list(target_df[["start", "end"]].itertuples(index=False, name=None)) + req_cols = get_required_cols("geometry", target_df.columns) + target_df = target_df[req_cols] + + count = "1 geometry" if len(target_df) == 1 else f"{len(target_df)} geometries" + click.echo(f"Found {count} in {fpath.resolve()}.") + + Path(save_dir).mkdir(parents=True, exist_ok=True) + with click.progressbar( + target_df.itertuples(index=False, name=None), + label="Getting gridded climate data", + length=len(target_df), + ) as bar: + for i, *args in bar: + fname = Path(save_dir, f"{i}.nc") + if fname.exists(): + continue + kwrgs = dict(zip(req_cols[1:], args)) + clm = gridmet.get_bygeom( + **kwrgs, + crs=target_df.crs, + variables=variables, + ssl=not disable_ssl, + ) + clm.to_netcdf(fname) + click.echo("Done.") diff --git a/pygridmet/core.py b/pygridmet/core.py new file mode 100644 index 0000000..e19db52 --- /dev/null +++ b/pygridmet/core.py @@ -0,0 +1,417 @@ +"""Core class for the GridMet functions.""" +# pyright: reportGeneralTypeIssues=false +from __future__ import annotations + +import functools +import warnings +from dataclasses import dataclass +from datetime import datetime +from typing import TYPE_CHECKING, Any, Callable, Iterable, TypeVar + +import numpy as np +import numpy.typing as npt +import pandas as pd +import xarray as xr + +from pygridmet.exceptions import InputRangeError, InputTypeError, InputValueError + +try: + from numba import config as numba_config + from numba import jit, prange + + ngjit = functools.partial(jit, nopython=True, cache=True, nogil=True) + numba_config.THREADING_LAYER = "workqueue" + has_numba = True +except ImportError: + has_numba = False + prange = range + + T = TypeVar("T") + Func = Callable[..., T] + + def ngjit( + signature_or_function: str | Func[T], parallel: bool = False + ) -> Callable[[Func[T]], Func[T]]: + def decorator_njit(func: Func[T]) -> Func[T]: + @functools.wraps(func) + def wrapper_decorator(*args: tuple[Any, ...], **kwargs: dict[str, Any]) -> T: + return func(*args, **kwargs) + + return wrapper_decorator + + return decorator_njit + + +if TYPE_CHECKING: + DF = TypeVar("DF", pd.DataFrame, xr.Dataset) + +DATE_FMT = "%Y-%m-%d" +# Default snow params from https://doi.org/10.5194/gmd-11-1077-2018 +T_RAIN = 2.5 # degC +T_SNOW = 0.6 # degC + +__all__ = ["GridMet"] + + +@dataclass +class GridMetBase: + """Base class for validating GridMet requests. + + Parameters + ---------- + snow : bool, optional + Compute snowfall from precipitation and minimum temperature. Defaults to ``False``. + variables : list, optional + List of variables to be downloaded. The acceptable variables are: + ``pr``, ``rmax``, ``rmin``, ``sph``, ``srad``, ``th``, ``tmmn``, ``tmmx``, ``vs``, + ``bi``, ``fm100``, ``fm1000``, ``erc``, ``etr``, ``pet``, and ``vpd``. + Descriptions can be found `here `__. + Defaults to ``None``, i.e., all the variables are downloaded. + + References + ---------- + .. footbibliography:: + """ + + snow: bool + variables: Iterable[str] + + def __post_init__(self) -> None: + valid_variables = ( + "pr", + "rmax", + "rmin", + "sph", + "srad", + "th", + "tmmn", + "tmmx", + "vs", + "bi", + "fm100", + "fm1000", + "erc", + "etr", + "pet", + "vpd", + ) + if "all" in self.variables: + self.variables = valid_variables + + if not set(self.variables).issubset(set(valid_variables)): + raise InputValueError("variables", valid_variables) + + if self.snow: + self.variables = list(set(self.variables).union({"tmmn"})) + + +@ngjit("f8[::1](f8[::1], f8[::1], f8, f8)", parallel=True) +def _separate_snow( + pr: npt.NDArray[np.float64], + tmmn: npt.NDArray[np.float64], + t_rain: np.float64, + t_snow: np.float64, +) -> npt.NDArray[np.float64]: + """Separate snow in precipitation.""" + t_rng = t_rain - t_snow + snow = np.zeros_like(pr) + + for t in prange(pr.shape[0]): + if tmmn[t] > t_rain: + snow[t] = 0.0 + elif tmmn[t] < t_snow: + snow[t] = pr[t] + else: + snow[t] = pr[t] * (t_rain - tmmn[t]) / t_rng + return snow + + +class GridMet: + """Base class for GridMet requests. + + Parameters + ---------- + variables : str or list or tuple, optional + List of variables to be downloaded. The acceptable variables are: + ``pr``, ``rmax``, ``rmin``, ``sph``, ``srad``, ``th``, ``tmmn``, ``tmmx``, ``vs``, + ``bi``, ``fm100``, ``fm1000``, ``erc``, ``etr``, ``pet``, and ``vpd``. + Descriptions can be found `here `__. + Defaults to ``None``, i.e., all the variables are downloaded. + snow : bool, optional + Compute snowfall from precipitation and minimum temperature. Defaults to ``False``. + + References + ---------- + .. footbibliography:: + """ + + def __init__( + self, + variables: Iterable[str] | str | None = None, + snow: bool = False, + ) -> None: + _variables = ["all"] if variables is None else variables + _variables = [_variables] if isinstance(_variables, str) else _variables + validated = GridMetBase(variables=_variables, snow=snow) + self.variables = validated.variables + self.snow = validated.snow + + self.bounds = (-124.7666, 25.0666, -67.0583, 49.4000) + self.valid_start = pd.to_datetime("1980-01-01") + self.valid_end = datetime.now() - pd.DateOffset(days=1) + self.missing_value = 32767.0 + + self.gridmet_table = pd.DataFrame( + { + "variable": [ + "Precipitation", + "Maximum Relative Humidity", + "Minimum Relative Humidity", + "Specific Humidity", + "Surface Radiation", + "Wind Direction", + "Minimum Air Temperature", + "Maximum Air Temperature", + "Wind Speed", + "Burning Index", + "Fuel Moisture (100-hr)", + "Fuel Moisture (1000-hr)", + "Energy Release Component", + "Reference Evapotranspiration (Alfalfa)", + "Reference Evapotranspiration (Grass)", + "Vapor Pressure Deficit", + ], + "abbr": [ + "pr", + "rmax", + "rmin", + "sph", + "srad", + "th", + "tmmn", + "tmmx", + "vs", + "bi", + "fm100", + "fm1000", + "erc", + "etr", + "pet", + "vpd", + ], + "long_name": [ + "precipitation_amount", + "daily_maximum_relative_humidity", + "daily_minimum_relative_humidity", + "daily_mean_specific_humidity", + "daily_mean_shortwave_radiation_at_surface", + "daily_mean_wind_direction", + "daily_minimum_temperature", + "daily_maximum_temperature", + "daily_mean_wind_speed", + "daily_mean_burning_index_g", + "dead_fuel_moisture_100hr", + "dead_fuel_moisture_1000hr", + "daily_mean_energy_release_component-g", + "daily_mean_reference_evapotranspiration_alfalfa", + "daily_mean_reference_evapotranspiration_grass", + "daily_mean_vapor_pressure_deficit", + ], + "units": [ + "mm", + "%", + "%", + "kg/kg", + "W/m2", + "Klockwise from north", + "K", + "K", + "m/s", + "-", + "%", + "%", + "-", + "mm", + "mm", + "kPa", + ], + } + ) + + self.units = dict(zip(self.gridmet_table["abbr"], self.gridmet_table["units"])) + self.units["snow"] = "mm" + + self.long_names = dict(zip(self.gridmet_table["abbr"], self.gridmet_table["long_name"])) + self.long_names["snow"] = "snow_amount" + + @staticmethod + def check_dates(dates: tuple[str, str] | int | list[int]) -> None: + """Check if input dates are in correct format and valid.""" + if not isinstance(dates, (tuple, list, int, range)): + raise InputTypeError( + "dates", + "tuple, list, range, or int", + "(start, end), range(start, end), or [years, ...]", + ) + + if isinstance(dates, tuple) and len(dates) != 2: + raise InputTypeError("dates", "Start and end should be passed as a tuple of length 2.") + + def dates_todict(self, dates: tuple[str, str]) -> dict[str, str]: + """Set dates by start and end dates as a tuple, (start, end).""" + if not isinstance(dates, tuple) or len(dates) != 2: + raise InputTypeError("dates", "tuple", "(start, end)") + + start = pd.to_datetime(dates[0]) + end = pd.to_datetime(dates[1]) + + if start < self.valid_start or end > self.valid_end: + raise InputRangeError("start/end", f"from {self.valid_start} to {self.valid_end}") + + return { + "start": start.strftime(DATE_FMT), + "end": end.strftime(DATE_FMT), + } + + def years_todict(self, years: list[int] | int | range) -> dict[str, str]: + """Set date by list of year(s).""" + years = [years] if isinstance(years, int) else list(years) + + if min(years) < self.valid_start.year or max(years) > self.valid_end.year: + raise InputRangeError( + "start/end", f"from {self.valid_start.year} to {self.valid_end.year}" + ) + + return {"years": ",".join(str(y) for y in years)} + + def dates_tolist(self, dates: tuple[str, str]) -> list[tuple[pd.Timestamp, pd.Timestamp]]: + """Correct dates for GridMet accounting for leap years. + + GridMet doesn't account for leap years and removes Dec 31 when + it's leap year. + + Parameters + ---------- + dates : tuple + Target start and end dates. + + Returns + ------- + list + All the dates in the GridMet database within the provided date range. + """ + date_dict = self.dates_todict(dates) + start = pd.to_datetime(date_dict["start"]) + end = pd.to_datetime(date_dict["end"]) + + period = pd.date_range(start, end) + nl = period[~period.is_leap_year] + lp = period[(period.is_leap_year) & (~period.strftime(DATE_FMT).str.endswith("12-31"))] + _period = period[(period.isin(nl)) | (period.isin(lp))] + years = [_period[_period.year == y] for y in _period.year.unique()] + return [(y[0], y[-1]) for y in years] + + def years_tolist(self, years: list[int] | int) -> list[tuple[pd.Timestamp, pd.Timestamp]]: + """Correct dates for GridMet accounting for leap years. + + GridMet doesn't account for leap years and removes Dec 31 when + it's leap year. + + Parameters + ---------- + years: list + A list of target years. + + Returns + ------- + list + All the dates in the GridMet database within the provided date range. + """ + date_dict = self.years_todict(years) + start_list, end_list = [], [] + for year in date_dict["years"].split(","): + s = pd.to_datetime(f"{year}0101") + start_list.append(s) + e = pd.to_datetime(f"{year}1230") if s.is_leap_year else pd.to_datetime(f"{year}1231") + end_list.append(e) + return list(zip(start_list, end_list)) + + @staticmethod + def _snow_point(climate: pd.DataFrame, t_rain: float, t_snow: float) -> pd.DataFrame: + """Separate snow from precipitation.""" + clm = climate.copy() + clm["snow (mm)"] = _separate_snow( + clm["pr (mm)"].to_numpy("f8"), + clm["tmmn (K)"].to_numpy("f8"), + np.float64(t_rain), + np.float64(t_snow), + ) + return clm + + @staticmethod + def _snow_gridded(climate: xr.Dataset, t_rain: float, t_snow: float) -> xr.Dataset: + """Separate snow from precipitation.""" + clm = climate.copy() + + def snow_func( + pr: npt.NDArray[np.float64], + tmmn: npt.NDArray[np.float64], + t_rain: float, + t_snow: float, + ) -> npt.NDArray[np.float64]: + """Separate snow based on Martinez and Gupta (2010).""" + return _separate_snow( + pr.astype("f8"), + tmmn.astype("f8") - 273.15, + np.float64(t_rain), + np.float64(t_snow), + ) + + clm["snow"] = xr.apply_ufunc( + snow_func, + clm["pr"], + clm["tmmn"] - 273.15, + t_rain, + t_snow, + input_core_dims=[["time"], ["time"], [], []], + output_core_dims=[["time"]], + vectorize=True, + output_dtypes=[clm["pr"].dtype], + ).transpose("time", "lat", "lon") + clm["snow"].attrs["units"] = "mm" + clm["snow"].attrs["long_name"] = "daily snowfall" + return clm + + def separate_snow(self, clm: DF, t_rain: float = T_RAIN, t_snow: float = T_SNOW) -> DF: + """Separate snow based on :footcite:t:`Martinez_2010`. + + Parameters + ---------- + clm : pandas.DataFrame or xarray.Dataset + Climate data that should include ``pr`` and ``tmmn``. + t_rain : float, optional + Threshold for temperature for considering rain, defaults to 2.5 K. + t_snow : float, optional + Threshold for temperature for considering snow, defaults to 0.6 K. + + Returns + ------- + pandas.DataFrame or xarray.Dataset + Input data with ``snow (mm)`` column if input is a ``pandas.DataFrame``, + or ``snow`` variable if input is an ``xarray.Dataset``. + + References + ---------- + .. footbibliography:: + """ + if not has_numba: + warnings.warn( + "Numba not installed. Using slow pure python version.", UserWarning, stacklevel=2 + ) + + if not isinstance(clm, (pd.DataFrame, xr.Dataset)): + raise InputTypeError("clm", "pandas.DataFrame or xarray.Dataset") + + if isinstance(clm, xr.Dataset): + return self._snow_gridded(clm, t_rain, t_snow) + return self._snow_point(clm, t_rain, t_snow) diff --git a/pygridmet/exceptions.py b/pygridmet/exceptions.py new file mode 100644 index 0000000..43dab48 --- /dev/null +++ b/pygridmet/exceptions.py @@ -0,0 +1,104 @@ +"""Customized PyDaymet exceptions.""" +from __future__ import annotations + +from typing import Generator, Sequence + + +class InputValueError(Exception): + """Exception raised for invalid input. + + Parameters + ---------- + inp : str + Name of the input parameter + valid_inputs : tuple + List of valid inputs + given : str, optional + The given input, defaults to None. + """ + + def __init__( + self, + inp: str, + valid_inputs: Sequence[str | int] | Generator[str | int, None, None], + given: str | int | None = None, + ) -> None: + if given is None: + self.message = f"Given {inp} is invalid. Valid options are:\n" + else: + self.message = f"Given {inp} ({given}) is invalid. Valid options are:\n" + self.message += "\n".join(str(i) for i in valid_inputs) + super().__init__(self.message) + + def __str__(self) -> str: + return self.message + + +class InputTypeError(Exception): + """Exception raised when a function argument type is invalid. + + Parameters + ---------- + arg : str + Name of the function argument + valid_type : str + The valid type of the argument + example : str, optional + An example of a valid form of the argument, defaults to None. + """ + + def __init__(self, arg: str, valid_type: str, example: str | None = None) -> None: + self.message = f"The {arg} argument should be of type {valid_type}" + if example is not None: + self.message += f":\n{example}" + super().__init__(self.message) + + def __str__(self) -> str: + return self.message + + +class InputRangeError(Exception): + """Exception raised when a function argument is not in the valid range. + + Parameters + ---------- + variable : str + Variable with invalid value + valid_range : str + Valid range + """ + + def __init__(self, variable: str, valid_range: str) -> None: + self.message = f"Valid range for {variable} is {valid_range}." + super().__init__(self.message) + + def __str__(self) -> str: + return self.message + + +class MissingItemError(Exception): + """Exception raised when a required item is missing. + + Parameters + ---------- + missing : list + A list of missing items. + """ + + def __init__(self, missing: list[str]) -> None: + self.message = "The following items are missing:\n" + f"{', '.join(missing)}" + super().__init__(self.message) + + def __str__(self) -> str: + return self.message + + +class MissingCRSError(Exception): + """Exception raised when input GeoDataFrame is missing CRS.""" + + def __init__(self) -> None: + self.message = "The input GeoDataFrame is missing CRS." + super().__init__(self.message) + + def __str__(self) -> str: + return self.message diff --git a/pygridmet/print_versions.py b/pygridmet/print_versions.py new file mode 100644 index 0000000..ad547a8 --- /dev/null +++ b/pygridmet/print_versions.py @@ -0,0 +1,172 @@ +"""Utility functions for printing version information. + +The original script is from +`xarray `__ +""" +# pyright: reportMissingImports=false +from __future__ import annotations + +import contextlib +import importlib +import importlib.util +import locale +import os +import platform +import struct +import subprocess +import sys +from importlib.metadata import PackageNotFoundError +from importlib.metadata import version as get_version +from pathlib import Path +from typing import TextIO + +__all__ = ["show_versions"] + + +def netcdf_and_hdf5_versions() -> list[tuple[str, str | None]]: + libhdf5_version = None + libnetcdf_version = None + + if importlib.util.find_spec("netCDF4"): + import netCDF4 + + libhdf5_version = netCDF4.__hdf5libversion__ + libnetcdf_version = netCDF4.__netcdf4libversion__ + elif importlib.util.find_spec("h5py"): + import h5py + + libhdf5_version = h5py.version.hdf5_version + + return [("libhdf5", libhdf5_version), ("libnetcdf", libnetcdf_version)] + + +def get_sys_info(): + """Return system information as a dict.""" + blob = [] + + # get full commit hash + commit = None + if Path(".git").is_dir(): + with contextlib.suppress(Exception): + pipe = subprocess.Popen( + 'git log --format="%H" -n 1'.split(" "), # noqa: S603 + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + so, _ = pipe.communicate() + + if pipe.returncode == 0: + commit = so + with contextlib.suppress(ValueError): + commit = so.decode("utf-8") + commit = commit.strip().strip('"') + + blob.append(("commit", commit)) + + with contextlib.suppress(Exception): + (sysname, _, release, _, machine, processor) = platform.uname() + blob.extend( + [ + ("python", sys.version), + ("python-bits", struct.calcsize("P") * 8), + ("OS", f"{sysname}"), + ("OS-release", f"{release}"), + ("machine", f"{machine}"), + ("processor", f"{processor}"), + ("byteorder", f"{sys.byteorder}"), + ("LC_ALL", f'{os.environ.get("LC_ALL", "None")}'), + ("LANG", f'{os.environ.get("LANG", "None")}'), + ("LOCALE", f"{locale.getlocale()}"), + ] + ) + return blob + + +def show_versions(file: TextIO = sys.stdout) -> None: + """Print versions of all the dependencies. + + Parameters + ---------- + file : file-like, optional + print to the given file-like object. Defaults to sys.stdout. + """ + deps = [ + # HyRiver packages + "async-retriever", + "pygeoogc", + "pygeoutils", + "py3dep", + "pynhd", + "pygridmet", + "pydaymet", + "hydrosignatures", + "pynldas2", + "pygeohydro", + # async-retriever deps + "aiohttp", + "aiohttp-client-cache", + "aiosqlite", + "cytoolz", + "ujson", + # pygeoogc deps + "defusedxml", + "joblib", + "multidict", + "owslib", + "pyproj", + "requests", + "requests-cache", + "shapely", + "url-normalize", + "urllib3", + "yarl", + # pygeoutils deps + "geopandas", + "netcdf4", + "numpy", + "rasterio", + "rioxarray", + "scipy", + "shapely", + "ujson", + "xarray", + # py3dep deps + "click", + "pyflwdir", + # pynhd deps + "networkx", + "pyarrow", + # pygeohydro deps + "folium", + "h5netcdf", + "matplotlib", + "pandas", + # optional + "numba", + "bottleneck", + "py7zr", + "pyogrio", + ] + pad = len(max(deps, key=len)) + 1 + + deps_blob = {} + for modname in deps: + try: + deps_blob[modname] = get_version(modname) + except PackageNotFoundError: + deps_blob[modname] = "N/A" + except (NotImplementedError, AttributeError): + deps_blob[modname] = "installed" + + print("\nSYS INFO", file=file) + print("--------", file=file) + + for k, stat in get_sys_info(): + print(f"{k}: {stat}", file=file) + + header = f"\n{'PACKAGE':<{pad}} VERSION" + print(header, file=file) + print("-" * len(header), file=file) + for k, stat in deps_blob.items(): + print(f"{k:<{pad}} {stat}", file=file) + print("-" * len(header), file=file) diff --git a/pygridmet/py.typed b/pygridmet/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/pygridmet/pygridmet.py b/pygridmet/pygridmet.py new file mode 100644 index 0000000..b364067 --- /dev/null +++ b/pygridmet/pygridmet.py @@ -0,0 +1,434 @@ +"""Access the GridMet database for both single single pixel and gridded queries.""" +# pyright: reportGeneralTypeIssues=false +from __future__ import annotations + +import functools +import io +import itertools +import re +import warnings +from typing import TYPE_CHECKING, Generator, Iterable, Sequence, Union, cast + +import numpy as np +import pandas as pd +import shapely +import xarray as xr + +import async_retriever as ar +import pygeoogc as ogc +import pygeoutils as geoutils +from pygeoogc import ServiceError, ServiceURL +from pygeoutils import Coordinates +from pygridmet.core import T_RAIN, T_SNOW, GridMet +from pygridmet.exceptions import InputRangeError, InputTypeError + +if TYPE_CHECKING: + from pathlib import Path + + import pyproj + from shapely import MultiPolygon, Polygon + + CRSTYPE = Union[int, str, pyproj.CRS] + +DATE_FMT = "%Y-%m-%dT%H:%M:%SZ" +MAX_CONN = 10 + +__all__ = ["get_bycoords", "get_bygeom"] + + +def _coord_urls( + coord: tuple[float, float], + variables: Iterable[str], + dates: list[tuple[pd.Timestamp, pd.Timestamp]], + long_names: dict[str, str], +) -> Generator[list[tuple[str, dict[str, dict[str, str]]]], None, None]: + """Generate an iterable URL list for downloading GridMet data. + + Parameters + ---------- + coord : tuple of length 2 + Coordinates in EPSG:4326 CRS (lon, lat) + variables : list + A list of GridMet variables + dates : list + A list of dates + long_names : dict + A dictionary of long names for the variables. + + Returns + ------- + generator + An iterator of generated URLs. + """ + lon, lat = coord + return ( + [ + ( + f"{ServiceURL().restful.gridmet}/agg_met_{v}_1979_CurrentYear_CONUS.nc", + { + "params": { + "var": long_names[v], + "longitude": f"{lon:0.6f}", + "latitude": f"{lat:0.6f}", + "time_start": s.strftime(DATE_FMT), + "time_end": e.strftime(DATE_FMT), + "accept": "csv", + } + }, + ) + for s, e in dates + ] + for v in variables + ) + + +def _get_lon_lat( + coords: list[tuple[float, float]] | tuple[float, float], + coords_id: Sequence[str | int] | None = None, + crs: CRSTYPE = 4326, + to_xarray: bool = False, +) -> tuple[list[float], list[float]]: + """Get longitude and latitude from a list of coordinates.""" + coords_list = geoutils.coords_list(coords) + + if to_xarray and coords_id is not None and len(coords_id) != len(coords_list): + raise InputTypeError("coords_id", "list with the same length as of coords") + + coords_list = ogc.match_crs(coords_list, crs, 4326) + lon, lat = zip(*coords_list) + return list(lon), list(lat) + + +def _by_coord( + lon: float, + lat: float, + gridmet: GridMet, + dates: list[tuple[pd.Timestamp, pd.Timestamp]], + snow: bool, + snow_params: dict[str, float] | None, + ssl: bool, +) -> pd.DataFrame: + """Get climate data for a coordinate and return as a DataFrame.""" + coords = (lon, lat) + url_kwds = _coord_urls(coords, gridmet.variables, dates, gridmet.long_names) + retrieve = functools.partial(ar.retrieve_text, max_workers=MAX_CONN, ssl=ssl) + + clm = pd.concat( + ( + pd.concat( + pd.read_csv(io.StringIO(r), parse_dates=[0], index_col=[0], usecols=[0, 3]) + for r in retrieve(u, k) + ) + for u, k in (zip(*u) for u in url_kwds) + ), + axis=1, + ) + # Rename the columns from their long names to abbreviations and put the units in parentheses + abbrs = {v: k for k, v in gridmet.long_names.items()} + clm.columns = clm.columns.str.replace(r'\[unit="(.+)"\]', "", regex=True) + clm.columns = clm.columns.map(abbrs).map(lambda x: f"{x} ({gridmet.units[x]})") + + clm = clm.set_index(pd.to_datetime(clm.index.strftime("%Y-%m-%d"))) + clm = clm.where(clm < gridmet.missing_value) + + if snow: + params = {"t_rain": T_RAIN, "t_snow": T_SNOW} if snow_params is None else snow_params + clm = gridmet.separate_snow(clm, **params) + clm.index.name = "time" + return clm + + +def get_bycoords( + coords: list[tuple[float, float]] | tuple[float, float], + dates: tuple[str, str] | int | list[int], + coords_id: Sequence[str | int] | None = None, + crs: CRSTYPE = 4326, + variables: Iterable[str] | str | None = None, + snow: bool = False, + snow_params: dict[str, float] | None = None, + ssl: bool = True, + to_xarray: bool = False, +) -> pd.DataFrame | xr.Dataset: + """Get point-data from the GridMet database at 1-km resolution. + + This function uses THREDDS data service to get the coordinates + and supports getting monthly and annual summaries of the climate + data directly from the server. + + Parameters + ---------- + coords : tuple or list of tuples + Coordinates of the location(s) of interest as a tuple (x, y) + dates : tuple or list, optional + Start and end dates as a tuple (start, end) or a list of years ``[2001, 2010, ...]``. + coords_id : list of int or str, optional + A list of identifiers for the coordinates. This option only applies when ``to_xarray`` + is set to ``True``. If not provided, the coordinates will be enumerated. + crs : str, int, or pyproj.CRS, optional + The CRS of the input coordinates, defaults to ``EPSG:4326``. + variables : str or list + List of variables to be downloaded. The acceptable variables are: + ``pr``, ``rmax``, ``rmin``, ``sph``, ``srad``, ``th``, ``tmmn``, ``tmmx``, ``vs``, + ``bi``, ``fm100``, ``fm1000``, ``erc``, ``etr``, ``pet``, and ``vpd``. + Descriptions can be found `here `__. + Defaults to ``None``, i.e., all the variables are downloaded. + snow : bool, optional + Compute snowfall from precipitation and minimum temperature. Defaults to ``False``. + snow_params : dict, optional + Model-specific parameters as a dictionary that is passed to the snowfall function. + These parameters are only used if ``snow`` is ``True``. Two parameters are required: + ``t_rain`` (deg C) which is the threshold for temperature for considering rain and + ``t_snow`` (deg C) which is the threshold for temperature for considering snow. + The default values are ``{'t_rain': 2.5, 't_snow': 0.6}`` that are adopted from + https://doi.org/10.5194/gmd-11-1077-2018. + ssl : bool, optional + Whether to verify SSL certification, defaults to ``True``. + to_xarray : bool, optional + Return the data as an ``xarray.Dataset``. Defaults to ``False``. + + Returns + ------- + pandas.DataFrame or xarray.Dataset + Daily climate data for a single or list of locations. + + Examples + -------- + >>> import pygridmet as gridmet + >>> coords = (-1431147.7928, 318483.4618) + >>> dates = ("2000-01-01", "2000-01-31") + >>> clm = gridmet.get_bycoords( + ... coords, + ... dates, + ... crs=3542, + ... ) + >>> clm["pr (mm)"].mean() + 9.677 + + References + ---------- + .. footbibliography:: + """ + gridmet = GridMet(variables, snow) + gridmet.check_dates(dates) + + if isinstance(dates, tuple): + dates_itr = gridmet.dates_tolist(dates) + else: + dates_itr = gridmet.years_tolist(dates) + + lon, lat = _get_lon_lat(coords, coords_id, crs, to_xarray) + points = Coordinates(lon, lat, gridmet.bounds).points + n_pts = len(points) + if n_pts == 0 or n_pts != len(lon): + raise InputRangeError("coords", f"within {gridmet.bounds}") + + by_coord = functools.partial( + _by_coord, + gridmet=gridmet, + dates=dates_itr, + snow=snow, + snow_params=snow_params, + ssl=ssl, + ) + clm_list = itertools.starmap(by_coord, zip(points.x, points.y)) + + idx = list(coords_id) if coords_id is not None else list(range(n_pts)) + if to_xarray: + clm_ds = xr.concat( + (xr.Dataset.from_dataframe(clm) for clm in clm_list), dim=pd.Index(idx, name="id") + ) + clm_ds = clm_ds.rename( + {n: re.sub(r"\([^\)]*\)", "", str(n)).strip() for n in clm_ds.data_vars} + ) + clm_ds["time"] = pd.DatetimeIndex(pd.to_datetime(clm_ds["time"]).date) + for v in clm_ds.data_vars: + clm_ds[v].attrs["units"] = gridmet.units[v] + clm_ds[v].attrs["long_name"] = gridmet.long_names[v] + clm_ds["lat"] = (("id",), points.y) + clm_ds["lon"] = (("id",), points.x) + return clm_ds + + if n_pts == 1: + clm = next(iter(clm_list), pd.DataFrame()) + else: + clm = pd.concat(clm_list, keys=idx, axis=1) + clm = clm.columns.set_names(["id", "variable"]) + clm = clm.set_index(pd.DatetimeIndex(pd.to_datetime(clm.index).date)) + return clm + + +def _gridded_urls( + bounds: tuple[float, float, float, float], + variables: Iterable[str], + dates: list[tuple[pd.Timestamp, pd.Timestamp]], + long_names: dict[str, str], +) -> Generator[tuple[str, dict[str, dict[str, str]]], None, None]: + """Generate an iterable URL list for downloading GridMet data. + + Parameters + ---------- + bounds : tuple of length 4 + Bounding box (west, south, east, north) + variables : list + A list of GridMet variables + dates : list + A list of dates + long_names : dict + A dictionary of long names for the variables. + + Returns + ------- + generator + An iterator of generated URLs. + """ + west, south, east, north = bounds + return ( + ( + f"{ServiceURL().restful.gridmet}/agg_met_{v}_1979_CurrentYear_CONUS.nc", + { + "params": { + "var": long_names[v], + "north": f"{north:0.6f}", + "west": f"{west:0.6f}", + "east": f"{east:0.6f}", + "south": f"{south:0.6f}", + "disableProjSubset": "on", + "horizStride": "1", + "time_start": s.strftime(DATE_FMT), + "time_end": e.strftime(DATE_FMT), + "timeStride": "1", + "accept": "netcdf", + } + }, + ) + for v, (s, e) in itertools.product(variables, dates) + ) + + +def _open_dataset(f: Path) -> xr.Dataset: + """Open a dataset using ``xarray``.""" + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=xr.SerializationWarning) + with xr.open_dataset(f) as ds: + return ds.load() + + +def get_bygeom( + geometry: Polygon | MultiPolygon | tuple[float, float, float, float], + dates: tuple[str, str] | int | list[int], + crs: CRSTYPE = 4326, + variables: Iterable[str] | str | None = None, + snow: bool = False, + snow_params: dict[str, float] | None = None, + ssl: bool = True, +) -> xr.Dataset: + """Get gridded data from the GridMet database at 1-km resolution. + + Parameters + ---------- + geometry : Polygon, MultiPolygon, or bbox + The geometry of the region of interest. + dates : tuple or list, optional + Start and end dates as a tuple (start, end) or a list of years [2001, 2010, ...]. + crs : str, int, or pyproj.CRS, optional + The CRS of the input geometry, defaults to epsg:4326. + variables : str or list + List of variables to be downloaded. The acceptable variables are: + ``pr``, ``rmax``, ``rmin``, ``sph``, ``srad``, ``th``, ``tmmn``, ``tmmx``, ``vs``, + ``bi``, ``fm100``, ``fm1000``, ``erc``, ``etr``, ``pet``, and ``vpd``. + Descriptions can be found `here `__. + Defaults to ``None``, i.e., all the variables are downloaded. + snow : bool, optional + Compute snowfall from precipitation and minimum temperature. Defaults to ``False``. + snow_params : dict, optional + Model-specific parameters as a dictionary that is passed to the snowfall function. + These parameters are only used if ``snow`` is ``True``. Two parameters are required: + ``t_rain`` (deg C) which is the threshold for temperature for considering rain and + ``t_snow`` (deg C) which is the threshold for temperature for considering snow. + The default values are ``{'t_rain': 2.5, 't_snow': 0.6}`` that are adopted from + https://doi.org/10.5194/gmd-11-1077-2018. + ssl : bool, optional + Whether to verify SSL certification, defaults to ``True``. + + Returns + ------- + xarray.Dataset + Daily climate data within the target geometry. + + Examples + -------- + >>> from shapely import Polygon + >>> import pygridmet as gridmet + >>> geometry = Polygon( + ... [[-69.77, 45.07], [-69.31, 45.07], [-69.31, 45.45], [-69.77, 45.45], [-69.77, 45.07]] + ... ) + >>> clm = gridmet.get_bygeom(geometry, 2010, variables="tmmn") + >>> clm["tmmn"].mean().item() + 274.167 + + References + ---------- + .. footbibliography:: + """ + gridmet = GridMet(variables, snow) + gridmet.check_dates(dates) + + if isinstance(dates, tuple): + dates_itr = gridmet.dates_tolist(dates) + else: + dates_itr = gridmet.years_tolist(dates) + + crs = ogc.validate_crs(crs) + _geometry = geoutils.geo2polygon(geometry, crs, 4326) + + if not _geometry.intersects(shapely.box(*gridmet.bounds)): + raise InputRangeError("geometry", f"within {gridmet.bounds}") + + urls, kwds = zip( + *_gridded_urls( + _geometry.bounds, + gridmet.variables, + dates_itr, + gridmet.long_names, + ) + ) + urls = cast("list[str]", list(urls)) + kwds = cast("list[dict[str, dict[str, str]]]", list(kwds)) + + clm_files = ogc.streaming_download( + urls, + kwds, + file_extention="nc", + ssl=ssl, + n_jobs=MAX_CONN, + ) + try: + # open_mfdataset can run into too many open files error so we use merge + # https://docs.xarray.dev/en/stable/user-guide/io.html#reading-multi-file-datasets + clm = xr.merge(_open_dataset(f) for f in clm_files) + except ValueError as ex: + msg = " ".join( + ( + "GridMet did NOT process your request successfully.", + "Check your inputs and try again.", + ) + ) + raise ServiceError(msg) from ex + + clm = xr.where(clm < gridmet.missing_value, clm, np.nan, keep_attrs=True) + for v in clm.data_vars: + clm[v] = clm[v].rio.write_nodata(np.nan) + clm = geoutils.xd_write_crs(clm, 4326, "spatial_ref") + clm = clm.drop_vars("crs") + clm = cast("xr.Dataset", clm) + clm = geoutils.xarray_geomask(clm, _geometry, 4326) + abbrs = {v: k for k, v in gridmet.long_names.items() if v in clm.data_vars} + abbrs["day"] = "time" + clm = clm.rename(abbrs) + for v in clm.data_vars: + clm[v].attrs["long_name"] = gridmet.long_names[v] + + if snow: + params = {"t_rain": T_RAIN, "t_snow": T_SNOW} if snow_params is None else snow_params + clm = gridmet.separate_snow(clm, **params) + return clm diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..29e1b43 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,273 @@ +[build-system] +build-backend = "setuptools.build_meta" +requires = [ + "setuptools>=60", + "setuptools-scm>=8", +] + +[project] +name = "pygridmet" +description = "Access daily, monthly, and annual climate data via the Daymet web service." +readme = "README.rst" +license = {text = "MIT"} +authors = [{name = "Taher Chegini", email = "cheginit@gmail.com"}] +requires-python = ">=3.8" +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Atmospheric Science", + "Topic :: Scientific/Engineering :: GIS", + "Topic :: Scientific/Engineering :: Hydrology", + "Typing :: Typed", +] +dynamic = [ + "version", +] +dependencies = [ + "async-retriever<0.16,>=0.15.2", + "click>=0.7", + "geopandas>=0.10", + "numpy>=1.21", + "pandas>=1", + "pygeoogc<0.16,>=0.15.2", + "pygeoutils<0.16,>=0.15.2", + "pyproj>=3.0.1", + "shapely>=2", + "xarray>=2023.01", +] +[project.optional-dependencies] +speedup = [ + "numba", +] +test = [ + "pyarrow>=1.0.1", + "pytest-cov", + "pytest-sugar", + "pytest-xdist[psutil]", +] +[project.urls] +CI = "https://github.com/hyriver/pygridmet/actions" +Changelog = "https://docs.hyriver.io/changelogs/pygridmet.html" +Homepage = "https://docs.hyriver.io/readme/pygridmet.html" +Issues = "https://github.com/hyriver/pygridmet/issues" +[project.scripts] +pygridmet = "pygridmet.cli:cli" + +[tool.setuptools] +include-package-data = true +license-files = ["LICENSE"] +platforms = ["any"] +zip-safe = false + +[tool.setuptools.package-data] +pygridmet = ["py.typed"] + +[tool.setuptools.packages] +find = {namespaces = false} + +[tool.setuptools_scm] +fallback_version = "999" + +[tool.black] +line-length = 100 + +[tool.ruff] +target-version = "py38" +select = [ + # flake8-bugbear + "B", + # flake8-comprehensions + "C4", + # pydocstyle + "D", + # Error + "E", + # pyflakes + "F", + # isort + "I", + # flake8-implicit-str-concat + "ISC", + # pep8-naming + "N", + # pygrep-hooks + "PGH", + # flake8-pytest-style + "PT", + # flake8-use-pathlib + "PTH", + # flake8-quotes + "Q", + # bandit + "S", + # flake8-simplify + "SIM", + # flake8-print + "T20", + # tryceratops + "TRY", + # pyupgrade + "UP", + # Warning + "W", + # flake8-2020 + "YTT", + # flake8-debugger + "T10", + # flake8-gettext + "INT", + # pylint + "PLC", + "PLE", + "PLR", + "PLW", + # misc lints + "PIE", + # flake8-pyi + "PYI", + # tidy imports + "TID", + # type-checking imports + "TCH", + # Ruff-specific rules + "RUF", +] + +exclude = [ + "__pycache__", + ".nox", +] + +ignore = [ + "D103", + "D105", + "E501", + "PLR2004", + "PLR0913", +] +line-length = 100 + +[tool.ruff.flake8-bugbear] +extend-immutable-calls = [ + "chr", + "typer.Argument", + "typer.Option", +] + +[tool.ruff.pydocstyle] +convention = "numpy" + +[tool.ruff.per-file-ignores] +"tests/*.py" = [ + "D100", + "D101", + "D102", + "D103", + "D104", + "D105", + "D106", + "D107", + # use of "assert" + "S101", + # use of "exec" + "S102", + # possible hardcoded password + "S106", + # use of "eval" + "PGH001", + # Mutable class attributes + "RUF012", +] + +[tool.ruff.isort] +known-first-party = [ + "async_retriever", + "pygeoogc", + "pygeoutils", + "pynhd", + "py3dep", + "hydrosignatures", + "pygeohydro", + "pygridmet", + "pynldas2", +] + +[tool.pycln] +all = true +expand_stars = true +no_gitignore = false +verbose = true + +[tool.codespell] +skip = "__pycache__,_build,.mypy_cache,.git,./htmlcov,.nox,**/us_abbrs.py,cache" +ignore-words-list = "gage,gages,paramss,trough" + +[tool.pytest.ini_options] +addopts = "--ignore=noxfile.py -n=auto -v --cov=pygridmet --cov-report xml --durations=5" +doctest_optionflags = 'NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL NUMBER' +filterwarnings = [ + "ignore:.*distutils.*", + "ignore:.*--rsyncdir command line argument.*", + "ignore:.*numpy.ndarray size changed.*", + "ignore:.*'cgi' is deprecated.*", + "ignore:.*Calling float on a single element.*", + "ignore:.*Shapely GEOS version.*", +] +testpaths = [ + "tests", + "pygridmet" +] +markers = [ + "speedup: Tests that require numba", +] + +[tool.coverage.report] +exclude_lines = [ + 'raise ServiceUnavailableError', + "if TYPE_CHECKING:" +] +ignore_errors = true + +[tool.coverage.run] +branch = true +omit = [ + "**/__init__.py", + "**/print_versions.py" +] +parallel = true +source = ['pygridmet'] + +[tool.pyright] +exclude = [ + ".nox/", + "**/__pycache__", + "**/__init__.py", + "tests/" +] +include = ["pygridmet"] +reportMissingTypeStubs = false +reportUnknownArgumentType = false +reportUnknownLambdaType = false +reportUnknownMemberType = false +reportUnknownParameterType = false +reportUnknownVariableType = false +reportUnnecessaryIsInstance = false +reportUntypedFunctionDecorator = false +typeCheckingMode = "strict" + +[tool.fixit] +formatter = "ufmt" + +[tool.refurb] +python_version = "3.8" +quiet = true diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py new file mode 100644 index 0000000..ae9f16e --- /dev/null +++ b/tests/test_exceptions.py @@ -0,0 +1,169 @@ +import shutil +from pathlib import Path + +import geopandas as gpd +import pandas as pd +import pytest +from shapely import Polygon + +import pygridmet as gridmet +from pygridmet import ( + InputRangeError, + InputTypeError, + InputValueError, + MissingCRSError, + MissingItemError, +) +from pygridmet.cli import cli + +GEOM = Polygon( + [[-69.77, 45.07], [-69.31, 45.07], [-69.31, 45.45], [-69.77, 45.45], [-69.77, 45.07]] +) +COORDS = (-69.77, 45.07) +DATES = ("2000-01-01", "2000-12-31") + + +def test_invalid_variable(): + with pytest.raises(InputValueError) as ex: + _ = gridmet.get_bycoords(COORDS, DATES, variables="tt") + assert "variables" in str(ex.value) + + +def test_invalid_coords(): + with pytest.raises(InputRangeError) as ex: + _ = gridmet.get_bycoords((0, 0), DATES) + assert "-124.7666" in str(ex.value) + + +def test_invalid_date(): + with pytest.raises(InputRangeError) as ex: + _ = gridmet.get_bycoords(COORDS, ("1950-01-01", "2010-01-01")) + assert "1980" in str(ex.value) + + +def test_invalid_year(): + with pytest.raises(InputRangeError) as ex: + _ = gridmet.get_bycoords(COORDS, 1950) + assert "1980" in str(ex.value) + + +def test_invalid_year_type(): + with pytest.raises(InputTypeError) as ex: + _ = gridmet.get_bycoords(COORDS, "1950") + assert "or int" in str(ex.value) + + +def test_invalid_date_tuple(): + with pytest.raises(InputTypeError) as ex: + _ = gridmet.get_bycoords(COORDS, ("2010-01-01")) + assert "(start, end)" in str(ex.value) + + +class TestCLIFails: + """Test the command-line interface exceptions.""" + + def test_cli_missing_col(self, runner): + params = { + "id": "coords_test", + "lon": -100, + "start": "2000-01-01", + "end": "2000-01-12", + } + coord_csv = "coords_missing_co.csv" + save_dir = "test_cli_missing_col" + df = pd.DataFrame(params, index=[0]) + df.to_csv(coord_csv) + ret = runner.invoke( + cli, + [ + "coords", + coord_csv, + "-s", + save_dir, + ], + ) + Path(coord_csv).unlink() + shutil.rmtree(save_dir, ignore_errors=True) + assert ret.exit_code == 1 + assert isinstance(ret.exception, MissingItemError) + assert "lat" in str(ret.exception) + + def test_wrong_geo_format(self, runner): + params = { + "id": "geo_test", + "start": "2000-01-01", + "end": "2000-05-31", + } + geo_feather = "geo_wrong_format.feather" + save_dir = "test_wrong_geo_format" + gdf = gpd.GeoDataFrame(params, geometry=[GEOM], index=[0], crs=4326) + gdf.to_feather(geo_feather) + ret = runner.invoke( + cli, + [ + "geometry", + geo_feather, + "-s", + save_dir, + ], + ) + Path(geo_feather).unlink() + shutil.rmtree(save_dir, ignore_errors=True) + assert ret.exit_code == 1 + assert isinstance(ret.exception, InputTypeError) + assert "gpkg" in str(ret.exception) + + def test_wrong_geo_crs(self, runner): + params = { + "id": "geo_test", + "start": "2000-01-01", + "end": "2000-05-31", + } + geo_gpkg = Path("wrong_geo_crs.gpkg") + save_dir = "test_wrong_geo_crs" + gdf = gpd.GeoDataFrame(params, geometry=[GEOM], index=[0], crs=None) + gdf.to_file(geo_gpkg) + ret = runner.invoke( + cli, + [ + "geometry", + str(geo_gpkg), + "-s", + save_dir, + ], + ) + if geo_gpkg.is_dir(): + shutil.rmtree(geo_gpkg) + else: + geo_gpkg.unlink() + shutil.rmtree(save_dir, ignore_errors=True) + assert ret.exit_code == 1 + assert isinstance(ret.exception, MissingCRSError) + assert "CRS" in str(ret.exception) + + def test_wrong_coords_format(self, runner): + params = { + "id": "coords_test", + "lon": -69.77, + "lat": 45.07, + "start": "2000-01-01", + "end": "2000-12-31", + } + coord_paquet = "wrong_coords_format.paquet" + save_dir = "test_wrong_coords_format" + df = pd.DataFrame(params, index=[0]) + df.to_parquet(coord_paquet) + ret = runner.invoke( + cli, + [ + "coords", + coord_paquet, + "-s", + save_dir, + ], + ) + Path(coord_paquet).unlink() + shutil.rmtree(save_dir, ignore_errors=True) + assert ret.exit_code == 1 + assert isinstance(ret.exception, InputTypeError) + assert "csv" in str(ret.exception) diff --git a/tests/test_pygridmet.py b/tests/test_pygridmet.py new file mode 100644 index 0000000..e3a73de --- /dev/null +++ b/tests/test_pygridmet.py @@ -0,0 +1,142 @@ +"""Tests for PyDaymet package.""" +import io +import shutil +from pathlib import Path + +import cytoolz.curried as tlz +import geopandas as gpd +import numpy as np +import pandas as pd +import pytest +from shapely import Polygon + +import pygridmet as gridmet +from pygridmet.cli import cli + +GEOM = Polygon( + [[-69.77, 45.07], [-69.31, 45.07], [-69.31, 45.45], [-69.77, 45.45], [-69.77, 45.07]] +) +DAY = ("2000-01-01", "2000-01-12") +YEAR = 2010 +VAR = ["pr", "tmmn"] +DEF_CRS = 4326 +ALT_CRS = 3542 +COORDS = (-1431147.7928, 318483.4618) +DATES = ("2000-01-01", "2000-12-31") + + +def assert_close(a: float, b: float, rtol: float = 1e-3) -> bool: + assert np.isclose(a, b, rtol=rtol).all() + + +class TestByCoords: + @pytest.mark.speedup() + def test_snow(self): + clm = gridmet.get_bycoords(COORDS, DATES, snow=True, crs=ALT_CRS) + assert_close(clm["snow (mm)"].mean(), 0.0) + + def test_daily(self): + clm = gridmet.get_bycoords(COORDS, DATES, variables=VAR, crs=ALT_CRS) + clm_ds = gridmet.get_bycoords(COORDS, DATES, variables=VAR, crs=ALT_CRS, to_xarray=True) + + expected = 8.8493 + assert_close(clm["pr (mm)"].mean(), expected) + assert_close(clm_ds.pr.mean(), expected) + + +class TestByGeom: + @pytest.mark.speedup() + def test_snow(self): + clm = gridmet.get_bygeom(GEOM, DAY, snow=True, snow_params={"t_snow": 0.5}) + assert_close(clm.snow.mean().item(), 3.4895) + + def test_bounds(self): + clm = gridmet.get_bygeom(GEOM.bounds, DAY) + assert_close(clm.pr.mean().item(), 3.4895) + + def test_daily(self): + clm = gridmet.get_bygeom(GEOM, DAY, variables=VAR) + assert_close(clm.tmmn.mean().item(), 264.0151) + + +class TestCLI: + """Test the command-line interface.""" + + def test_geometry(self, runner): + params = { + "id": "geo_test", + "start": "2000-01-01", + "end": "2000-05-31", + "snow": "false", + } + geo_gpkg = Path("nat_geo.gpkg") + save_dir = "test_geometry" + gdf = gpd.GeoDataFrame(params, geometry=[GEOM], index=[0], crs=DEF_CRS) + gdf.to_file(geo_gpkg) + ret = runner.invoke( + cli, + [ + "geometry", + str(geo_gpkg), + *list(tlz.concat([["-v", v] for v in VAR])), + "-s", + save_dir, + "--disable_ssl", + ], + ) + if geo_gpkg.is_dir(): + shutil.rmtree(geo_gpkg) + else: + geo_gpkg.unlink() + shutil.rmtree(save_dir, ignore_errors=True) + assert str(ret.exception) == "None" + assert ret.exit_code == 0 + assert "Found 1 geometry" in ret.output + + @pytest.mark.speedup() + def test_coords(self, runner): + params = { + "id": "coords_test", + "lon": -69.77, + "lat": 45.07, + "start": DAY[0], + "end": DAY[1], + "snow": "TRUE", + } + coord_csv = "coords.csv" + save_dir = "test_coords" + df = pd.DataFrame(params, index=[0]) + df.to_csv(coord_csv, index=False) + ret = runner.invoke( + cli, + [ + "coords", + coord_csv, + *list(tlz.concat([["-v", v] for v in VAR])), + "-s", + save_dir, + "--disable_ssl", + ], + ) + runner.invoke( + cli, + [ + "coords", + coord_csv, + *list(tlz.concat([["-v", v] for v in VAR])), + "-s", + save_dir, + "--disable_ssl", + ], + ) + Path(coord_csv).unlink() + shutil.rmtree(save_dir, ignore_errors=True) + assert str(ret.exception) == "None" + assert ret.exit_code == 0 + assert "Found coordinates of 1 point" in ret.output + + +def test_show_versions(): + f = io.StringIO() + gridmet.show_versions(file=f) + assert "SYS INFO" in f.getvalue()