diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8c9d282 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +*.pyc +.git +.ipynb_checkpoints +.DS_Store +*.swp diff --git a/README.md b/README.md index 2f0fecf..890f7d8 100644 --- a/README.md +++ b/README.md @@ -1,35 +1,43 @@ # Description -This repository contains scripts to process necessary GeoTIFF datasets. The general usage of the script (i.e., `./extract-geotiff.sh`) is as follows: +This repository contains scripts to process necessary geospatial datasets and implement efficient zonal statistics on given ESRI Shapefiles. The general usage of the script (i.e., `./extract-gis.sh`) is as follows: ```console Usage: - extract-geotiff [options...] + extract-gis [options...] Script options: - -d, --dataset GeoTIFF dataset of interest, - currently available options are: - 'MODIS';'MERIT-Hydro';'SoilGridsV1'; - 'SoilGridsV2'; + -d, --dataset Geospatial dataset of interest, currently + available options are: 'MODIS'; + 'MERIT-Hydro';'SoilGridsV1' -i, --dataset-dir=DIR The source path of the dataset file(s) + -r, --crs=INT The EPSG code of interest; optional + [defaults to 4326] -v, --variable=var1[,var2[...]] If applicable, variables to process -o, --output-dir=DIR Writes processed files to DIR - -s, --start-date=DATE If applicable, start date of the GeoTIFF + -s, --start-date=DATE If applicable, start date of the geospatial data; optional - -e, --end-date=DATE If applicable, end date of the GeoTIFF + -e, --end-date=DATE If applicable, end date of the geospatial data; optional -l, --lat-lims=REAL,REAL Latitude's upper and lower bounds; optional -n, --lon-lims=REAL,REAL Longitude's upper and lower bounds; optional - -p, --shape-file=PATH Path to the ESRI '.shp' file; optional + -f, --shape-file=PATH Path to the ESRI '.shp' file; optional -j, --submit-job Submit the data extraction process as a job on the SLURM system; optional - -t, --stats=stat1[,stat2[...]] If applicable, extract the statistics of + -t, --print-geotiff=BOOL Extract the subsetted GeoTIFF file; optional + [defaults to 'true'] + -a, --stat=stat1[,stat2[...]] If applicable, extract the statistics of interest, currently available options are: 'min';'max';'mean';'majority';'minority'; - 'median';'quantiles';'variety';'variance'; + 'median';'quantile';'variety';'variance'; 'stdev';'coefficient_of_variation';'frac'; + optional + -q, --quantile=q1[,q2[...]] Quantiles of interest to be produced if 'quantile' + is included in the '--stat' argument. The values + must be comma delimited float numbers between + 0 and 1; optional [defaults to every 5th quantile] -p, --prefix=STR Prefix prepended to the output files -c, --cache=DIR Path of the cache directory; optional - -E, --email=STR E-mail when job starts, ends, and + -E, --email=STR E-mail when job starts, ends, and finishes; optional -V, --version Show version -h, --help Show this screen and exit @@ -37,32 +45,35 @@ Script options: # Available Datasets -|**#**|Dataset |Time Scale |DOI |Description | -|-----|--------------------------------------------|----------------------|-----------------------|---------------------| -|**1**|MODIS |2000 - 2021 | |[link](modis) | -|**2**|MERIT Hydro |Not Applicable (N/A) |10.1029/2019WR024873 |[link](merit_hydro) | -|**3**|Soil Grids (v1) | N/A |*ditto* |[link](soil_grids_v1)| -|**4** |Soil Grids (v2) | N/A |*ditto* |[link](soil_grids_v2)| +|**#**|Dataset |Time Scale |CRS |DOI |Description | +|-----|--------------------------------------------|----------------------|-----|-------------------------------|---------------------| +|**1**|MODIS |2000 - 2021 | |10.5067/MODIS/MCD12Q1.006 |[link](modis) | +|**2**|MERIT Hydro |Not Applicable (N/A) |4326 |10.1029/2019WR024873 |[link](merit_hydro) | +|**3**|Soil Grids (v1) |Not Applicable (N/A) |4326 |10.1371/journal.pone.0169748 |[link](soil_grids)| # General Example As an example, follow the code block below. Please remember that you MUST have access to Graham cluster with Compute Canada (CC) and have access to `MERIT-Hydro` dataset. Also, remember to generate a [Personal Access Token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token) with GitHub in advance. Enter the following codes in your Graham shell as a test case: ```console -foo@bar:~$ git clone https://github.com/kasra-keshavarz/geotifftool # clone the repository -foo@bar:~$ cd ./geotifftool/ # always move to the repository's directory -foo@bar:~$ ./extract-geotiff.sh -h # view the usage message -foo@bar:~$ ./extract-geotiff.sh --geotiff="MERIT-Hydro" \ - --dataset-dir="/project/rpp-kshook/Model_Output/WRF/CONUS/CTRL" \ - --output-dir="$HOME/scratch/conus_i_output/" \ - --start-date="2001-01-01 00:00:00" \ - --end-date="2001-12-31 23:00:00" \ - --lat-lims=49,51 \ - --lon-lims=-117,-115 \ - --variable=T2,PREC_ACC_NC,Q2,ACSWDNB,ACLWDNB,U10,V10,PSFC \ - --prefix="conus_i"; +foo@bar:~$ git clone https://github.com/kasra-keshavarz/gistool # clone the repository +foo@bar:~$ cd ./gistool/ # always move to the repository's directory +foo@bar:~$ ./extract-gis.sh -h # view the usage message +foo@bar:~$ wget -m -nd -nv -q -A "cat_pfaf_71_MERIT_Hydro_v07_Basins_v01_bugfix1.*" \ + "http://hydrology.princeton.edu/data/mpan/MERIT_Basins/MERIT_Hydro_v07_Basins_v01_bugfix1/pfaf_level_02/"; + # downloading a sample shapefile +foo@bar:~$ ./extract-gis.sh --dataset="merit-hydro" \ + --dataset-dir="/project/rpp-kshook/CompHydCore/merit_hydro/raw_data/" \ + --output-dir="$HOME/scratch/merit-hydro-test" \ + --shape-file="./cat_pfaf_67_MERIT_Hydro_v07_Basins_v01_bugfix1.shp" \ + --print-geotiff=true \ + --stat="min,max,mean,median,quantile" \ + --quantile="0.1,0.5,0.9" \ + --variable="elv,hnd" \ + --prefix="merit_test_"; + ``` -See the [example](./example) directory for real-world scripts for each GeoTIFF dataset included in this repository. +See the [example](./example) directory for real-world scripts for each geospatial dataset included in this repository. # New Datasets @@ -74,7 +85,7 @@ Please open a new ticket on the **Issues** tab of the current repository in case # License -GeoTIFF Processing Workflow
+Geospatial Dataset Processing Workflow
Copyright (C) 2022, University of Saskatchewan
This program is free software: you can redistribute it and/or modify diff --git a/assets/README.md b/assets/README.md new file mode 100644 index 0000000..5692979 --- /dev/null +++ b/assets/README.md @@ -0,0 +1,4 @@ +# Description + +This directory contains two main files: 1) `renv.lock` containing necessary meta-data for the R `renv` package to set up the environment/libraries necessary for running the [`exactextractr`](https://github.com/isciences/exactextractr) package that implements zonal statistics and 2) `stats.R` that calls [`exactextractr`](https://github.com/isciences/exactextractr) package after the R environment has been fully set up. + diff --git a/assets/renv.lock b/assets/renv.lock new file mode 100755 index 0000000..fdcae00 --- /dev/null +++ b/assets/renv.lock @@ -0,0 +1,310 @@ +{ + "R": { + "Version": "4.1.2", + "Repositories": [ + { + "Name": "CRAN", + "URL": "https://cloud.r-project.org" + } + ] + }, + "Packages": { + "DBI": { + "Package": "DBI", + "Version": "1.1.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "dcd1743af4336156873e3ce3c950b8b9", + "Requirements": [] + }, + "KernSmooth": { + "Package": "KernSmooth", + "Version": "2.23-20", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "8dcfa99b14c296bc9f1fd64d52fd3ce7", + "Requirements": [] + }, + "MASS": { + "Package": "MASS", + "Version": "7.3-54", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "0e59129db205112e3963904db67fd0dc", + "Requirements": [] + }, + "Matrix": { + "Package": "Matrix", + "Version": "1.3-4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "4ed05e9c9726267e4a5872e09c04587c", + "Requirements": [ + "lattice" + ] + }, + "Rcpp": { + "Package": "Rcpp", + "Version": "1.0.8.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "32e79b908fda56ee57fe518a8d37b864", + "Requirements": [] + }, + "boot": { + "Package": "boot", + "Version": "1.3-28", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "0baa960e3b49c6176a4f42addcbacc59", + "Requirements": [] + }, + "class": { + "Package": "class", + "Version": "7.3-19", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "1593b7beb067c8381c0d24e38bd778e0", + "Requirements": [ + "MASS" + ] + }, + "classInt": { + "Package": "classInt", + "Version": "0.4-7", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "9c04a89713e4c4f9c0f3bf9ef928f852", + "Requirements": [ + "KernSmooth", + "class", + "e1071" + ] + }, + "cluster": { + "Package": "cluster", + "Version": "2.1.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "ce49bfe5bc0b3ecd43a01fe1b01c2243", + "Requirements": [] + }, + "codetools": { + "Package": "codetools", + "Version": "0.2-18", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "019388fc48e48b3da0d3a76ff94608a8", + "Requirements": [] + }, + "e1071": { + "Package": "e1071", + "Version": "1.7-11", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "fc02aa712f8600f7da17f44989646a21", + "Requirements": [ + "class", + "proxy" + ] + }, + "exactextractr": { + "Package": "exactextractr", + "Version": "0.9.0", + "Source": "GitHub", + "RemoteType": "github", + "RemoteHost": "api.github.com", + "RemoteUsername": "isciences", + "RemoteRepo": "exactextractr", + "RemoteRef": "6d02446438991c3d6a8017474200aaf23fc7a695", + "RemoteSha": "6d02446438991c3d6a8017474200aaf23fc7a695", + "Hash": "c22e1f368bc1d3083f3ad82ee8134344", + "Requirements": [ + "Rcpp", + "raster", + "sf" + ] + }, + "foreign": { + "Package": "foreign", + "Version": "0.8-81", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "74628ea7a3be5ee8a7b5bb0a8e84882e", + "Requirements": [] + }, + "lattice": { + "Package": "lattice", + "Version": "0.20-45", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "b64cdbb2b340437c4ee047a1f4c4377b", + "Requirements": [] + }, + "magrittr": { + "Package": "magrittr", + "Version": "2.0.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "7ce2733a9826b3aeb1775d56fd305472", + "Requirements": [] + }, + "mgcv": { + "Package": "mgcv", + "Version": "1.8-38", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "be3c61ffbb1e3d3b3df214d192ac5444", + "Requirements": [ + "Matrix", + "nlme" + ] + }, + "nlme": { + "Package": "nlme", + "Version": "3.1-153", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "2d632e0d963a653a0329756ce701ecdd", + "Requirements": [ + "lattice" + ] + }, + "nnet": { + "Package": "nnet", + "Version": "7.3-16", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "3a3dc184000bc9e6c145c4dbde4dd702", + "Requirements": [] + }, + "proxy": { + "Package": "proxy", + "Version": "0.4-27", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "e0ef355c12942cf7a6b91a6cfaea8b3e", + "Requirements": [] + }, + "raster": { + "Package": "raster", + "Version": "3.5-15", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "70cf51235e017702ed77427797572ef0", + "Requirements": [ + "Rcpp", + "sp", + "terra" + ] + }, + "renv": { + "Package": "renv", + "Version": "0.15.5", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "6a38294e7d12f5d8e656b08c5bd8ae34", + "Requirements": [] + }, + "rgdal": { + "Package": "rgdal", + "Version": "1.5-32", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "7e8f18c3a55c90a44ea05e0620c05a7e", + "Requirements": [ + "sp" + ] + }, + "rpart": { + "Package": "rpart", + "Version": "4.1-15", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "9787c1fcb680e655d062e7611cadf78e", + "Requirements": [] + }, + "s2": { + "Package": "s2", + "Version": "1.0.7", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "cedf9ef196a446ee8080c14267e69410", + "Requirements": [ + "Rcpp", + "wk" + ] + }, + "sf": { + "Package": "sf", + "Version": "1.0-7", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "d237b77320537f7793c7a4f83267cf50", + "Requirements": [ + "DBI", + "Rcpp", + "classInt", + "magrittr", + "s2", + "units" + ] + }, + "sp": { + "Package": "sp", + "Version": "1.5-0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "2a118bdd2db0a301711e0a9e3e3206d5", + "Requirements": [ + "lattice" + ] + }, + "spatial": { + "Package": "spatial", + "Version": "7.3-14", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "a7341b0231ca84a0323982d5ae5a4300", + "Requirements": [] + }, + "survival": { + "Package": "survival", + "Version": "3.2-13", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "6f0a0fadc63bc6570fe172770f15bbc4", + "Requirements": [ + "Matrix" + ] + }, + "terra": { + "Package": "terra", + "Version": "1.5-34", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "c3401c4598d3c2d6158e10c966d70cbd", + "Requirements": [ + "Rcpp" + ] + }, + "units": { + "Package": "units", + "Version": "0.8-0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "6c374b265ba437f8d384ec7a313edd96", + "Requirements": [ + "Rcpp" + ] + }, + "wk": { + "Package": "wk", + "Version": "0.6.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "d6bc0432436a0aefbf29d5de8588b876", + "Requirements": [] + } + } +} diff --git a/assets/stats.R b/assets/stats.R new file mode 100755 index 0000000..05f74ee --- /dev/null +++ b/assets/stats.R @@ -0,0 +1,50 @@ +# Geospatial Dataset Processing Workflow +# Copyright (C) 2022, University of Saskatchewan +# +# This file is part of the Geospatial Dataset Processing Workflow +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# reading arguments +args <- commandArgs(); + +# assigning variables to input arguments +exactextractr_cache_path <- args[6]; +renv_source_package <- args[7]; +virtual_env_path <- args[8]; +working_dir_path <- args[9]; +lockfile_path <- args[10]; +vrt_path <- args[11]; +shapefile_path <- args[12]; +output_path <- args[13]; +stats <- args[14]; +quantiles <- args[15]; + +# set the working directory path +setwd(working_dir_path) + +# set cache and initialize the environment, i.e., `renv` +Sys.setenv("RENV_PATHS_CACHE"=exactextractr_cache_path); +install.packages(renv_source_package, repos=NULL, type="source", quiet=TRUE); +renv::activate(virtual_env_path); +renv::restore(lockfile=lockfile_path, prompt=FALSE); + +# produce necessary stats and print a csv file +r <- raster::raster(vrt_path); +p <- sf::st_read(shapefile_path, quiet=TRUE); +q <- as.double(unlist(strsplit(quantiles, ","))); +s <- unlist(strsplit(stats, ",")); +df <- cbind(p[[1]], exactextractr::exact_extract(r, p, s, quantiles=q)); # assuming first column indicates ID +write.csv(df, output_path, row.names=FALSE, quote=FALSE) + diff --git a/example/README.md b/example/README.md new file mode 100644 index 0000000..51dc2d8 --- /dev/null +++ b/example/README.md @@ -0,0 +1,3 @@ +# Description + +This directory contains examples of subsetting and implementing zonal statistics for the included geospatial datasets of this repository via the `gistool` scripts. Click on each file to view the relevant example. diff --git a/example/merit-hydro.sh b/example/merit-hydro.sh new file mode 100755 index 0000000..c9137e3 --- /dev/null +++ b/example/merit-hydro.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +# Geospatial Dataset Processing Workflow +# Copyright (C) 2022, University of Saskatchewan +# +# This file is part of the Geospatial Dataset Processing Workflow +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# This is a simple example to extract common statistics for the +# pfaf 71 - Saskatchewan-Nelson System Shapefiles of MERIT-Basins - +# from the MERIT-Hydro rasters. + +# Always call the script in the root directory of this repository +cd .. +echo "The current directory is: $(pwd)" + +# first download a sample shapefile - could be any shapefile +wget -m -nd -nv -q -A "cat_pfaf_71_MERIT_Hydro_v07_Basins_v01_bugfix1.*" \ + "http://hydrology.princeton.edu/data/mpan/MERIT_Basins/MERIT_Hydro_v07_Basins_v01_bugfix1/pfaf_level_02/"; + +# implement subsetting and zonal statistics +./extract-gis.sh --dataset="merit-hydro" \ + --dataset-dir="/project/rpp-kshook/CompHydCore/merit_hydro/raw_data/" \ + --output-dir="$HOME/scratch/merit-hydro-test" \ + --shape-file="$(pwd)/cat_pfaf_71_MERIT_Hydro_v07_Basins_v01_bugfix1.shp" \ + --print-geotiff=true \ + --stat="min,max,mean,median,quantile" \ + --quantile="0.1,0.5,0.9" \ + --variable="elv,hnd" \ + --prefix="merit_test_" \ + -j; + diff --git a/example/modis.sh b/example/modis.sh new file mode 100755 index 0000000..93d78b1 --- /dev/null +++ b/example/modis.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +# Geospatial Dataset Processing Workflow +# Copyright (C) 2022, University of Saskatchewan +# +# This file is part of the Geospatial Dataset Processing Workflow +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# This is a simple example to extract common statistics for the +# pfaf 71 - Saskatchewan-Nelson System Shapefiles of MERIT-Basins - +# from the MERIT-Hydro rasters. + +# Always call the script in the root directory of this repository +cd .. +echo "The current directory is: $(pwd)" + +# first download a sample shapefile - could be any shapefile +wget -m -nd -nv -q -A "cat_pfaf_71_MERIT_Hydro_v07_Basins_v01_bugfix1.*" \ + "http://hydrology.princeton.edu/data/mpan/MERIT_Basins/MERIT_Hydro_v07_Basins_v01_bugfix1/pfaf_level_02/"; + +# implement subsetting and zonal statistics +./extract-gis.sh --dataset="modis" \ + --dataset-dir="/project/rpp-kshook/Model_Output/MODIS/" \ + --variable="MCD12Q1.006" \ + --start-date="2001-01-01" \ + --end-date="2002-01-01" \ + --shape-file="$(pwd)/cat_pfaf_71_MERIT_Hydro_v07_Basins_v01_bugfix1.shp" \ + --print-geotiff=true \ + --output-dir="$HOME/scratch/modis-test/" \ + --prefix="test_" \ + --stat="majority,minority,frac" \ + -j; + diff --git a/example/soil-grids.sh b/example/soil-grids.sh new file mode 100755 index 0000000..b6bc088 --- /dev/null +++ b/example/soil-grids.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +# Geospatial Dataset Processing Workflow +# Copyright (C) 2022, University of Saskatchewan +# +# This file is part of the Geospatial Dataset Processing Workflow +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# This is a simple example to extract common statistics for the +# pfaf 71 - Saskatchewan-Nelson System Shapefiles of MERIT-Basins - +# from the MERIT-Hydro rasters. + +# Always call the script in the root directory of this repository +cd .. +echo "The current directory is: $(pwd)" + +# first download a sample shapefile - could be any shapefile +wget -m -nd -nv -q -A "cat_pfaf_71_MERIT_Hydro_v07_Basins_v01_bugfix1.*" \ + "http://hydrology.princeton.edu/data/mpan/MERIT_Basins/MERIT_Hydro_v07_Basins_v01_bugfix1/pfaf_level_02/"; + +# implement subsetting and zonal statistics +./extract-gis.sh --dataset="soil-grids-v1" \ + --dataset-dir="/project/rpp-kshook/Model_Output/SoilGridsV1/soilgrids/former/2017-03-10/data" \ + --variable="BDRICM_M_250m_ll,BLDFIE_M_sl4_250m_ll" \ + --shape-file="$(pwd)/cat_pfaf_71_MERIT_Hydro_v07_Basins_v01_bugfix1.shp" \ + --print-geotiff=true \ + --output-dir="$HOME/scratch/soil-grids-test/" \ + --prefix="test_" \ + --stat="mean,min,max,median,quantile" \ + --quantile="0.1,0.5,0.9" \ + -j; + diff --git a/extract-geotiff.sh b/extract-gis.sh similarity index 52% rename from extract-geotiff.sh rename to extract-gis.sh index 903d582..ebcb309 100755 --- a/extract-geotiff.sh +++ b/extract-gis.sh @@ -35,40 +35,56 @@ # Help functions # ============== usage () { - echo "GeoTIFF Processing Script + echo "GISTOOL: Geospatial Dataset Processing Script Usage: - $(basename $0) [options...] + extract-gis [options...] Script options: - -g, --geotiff GeoTIFF dataset of interest - currently available options are: - 'MERITHydro';'SoilGridsv1';'SoilGridsv2'; - 'MODIS'; - -i, --geotiff-dir=DIR The source path of the GeoTIFF file(s) - -v, --variable=var1[,var2[...]] Variables to process + -d, --dataset Geospatial dataset of interest, currently + available options are: 'MODIS'; + 'MERIT-Hydro';'SoilGridsV1' + -i, --dataset-dir=DIR The source path of the dataset file(s) + -r, --crs=INT The EPSG code of interest; optional + [defaults to 4326] + -v, --variable=var1[,var2[...]] If applicable, variables to process -o, --output-dir=DIR Writes processed files to DIR - -s, --start-date=DATE The start date of the data, if applicable - -e, --end-date=DATE The end date of the data, if applicable - -l, --lat-lims=REAL,REAL Latitude's upper and lower bounds - -n, --lon-lims=REAL,REAL Longitude's upper and lower bounds + -s, --start-date=DATE If applicable, start date of the geospatial + data; optional + -e, --end-date=DATE If applicable, end date of the geospatial + data; optional + -l, --lat-lims=REAL,REAL Latitude's upper and lower bounds; optional + -n, --lon-lims=REAL,REAL Longitude's upper and lower bounds; optional + -f, --shape-file=PATH Path to the ESRI '.shp' file; optional -j, --submit-job Submit the data extraction process as a job on the SLURM system; optional - -k, --no-chunk No parallelization, recommended for small domains + -t, --print-geotiff=BOOL Extract the subsetted GeoTIFF file; optional + [defaults to 'true'] + -a, --stat=stat1[,stat2[...]] If applicable, extract the statistics of + interest, currently available options are: + 'min';'max';'mean';'majority';'minority'; + 'median';'quantile';'variety';'variance'; + 'stdev';'coefficient_of_variation';'frac'; + optional + -q, --quantile=q1[,q2[...]] Quantiles of interest to be produced if 'quantile' + is included in the '--stat' argument. The values + must be comma delimited float numbers between + 0 and 1; optional [defaults to every 5th quantile] -p, --prefix=STR Prefix prepended to the output files -c, --cache=DIR Path of the cache directory; optional - -E, --email=user@example.com E-mail user when job starts, ends, and finishes; optional + -E, --email=STR E-mail when job starts, ends, and + finishes; optional -V, --version Show version -h, --help Show this screen and exit For bug reports, questions, discussions open an issue -at https://github.com/kasra-keshavarz/geotifftool/issues" >&1; +at https://github.com/kasra-keshavarz/gistool/issues" >&1; exit 0; } short_usage () { - echo "usage: $(basename $0) [-jh] [-i DIR] [-g DATASET] [-co DIR] [-se DATE] [-ln REAL,REAL] [-p STR]" >&1; + echo "usage: $(basename $0) -d DATASET -io DIR -v var1[,var2,[...]] [-jVhE] [-t BOOL] [-c DIR] [-se DATE] [-r INT] [-ln REAL,REAL] [-f PATH} [-p STR] [-a stat1[,stat2,[...]] [-q q1[,q2[...]]]] " >&1; } version () { @@ -91,7 +107,7 @@ shopt -s expand_aliases # Parsing input arguments # ======================= # argument parsing using getopt - WORKS ONLY ON LINUX BY DEFAULT -parsedArguments=$(getopt -a -n extract-geotiff -o jhVE:g:i:v:o:s:e:t:l:n:p:c:m:k --long submit-job,help,version,email:,geotiff:,geotiff-dir:,variable:,output-dir:,start-date:,end-date:,time-scale:,lat-lims:,lon-lims:,prefix:,cache:,ensemble:,no-chunk -- "$@") +parsedArguments=$(getopt -a -n extract-geotiff -o d:i:r:v:o:s:e:l:n:f:jt:a:q:p:c:EVh --long dataset:,dataset-dir:,crs:,variable:,output-dir:,start-date:,end-date:,lat-lims:,lon-lims:,shape-file:,submit-job,print-geotiff:,stat:,quantile:,prefix:,cache:,email:,version,help -- "$@") validArguments=$? # check if there is no valid options if [ "$validArguments" != "0" ]; then @@ -110,23 +126,25 @@ eval set -- "$parsedArguments" while : do case "$1" in - -h | --help) usage ; shift ;; # optional - -V | --version) version ; shift ;; # optional - -j | --submit-job) jobSubmission=true ; shift ;; # optional - -E | --email) email="$2" ; shift 2 ;; # optional - -i | --geotiff-dir) geotiffDir="$2" ; shift 2 ;; # required - -g | --geotiff) geotiff="$2" ; shift 2 ;; # required + -d | --dataset) geotiff="$2" ; shift 2 ;; # required + -i | --dataset-dir) geotiffDir="$2" ; shift 2 ;; # required + -r | --crs) crs="$2" ; shift 2 ;; # optional -v | --variable) variables="$2" ; shift 2 ;; # required -o | --output-dir) outputDir="$2" ; shift 2 ;; # required -s | --start-date) startDate="$2" ; shift 2 ;; # required - -e | --end-date) endDate="$2" ; shift 2 ;; # required - -t | --time-scale) timeScale="$2" ; shift 2 ;; # required - -l | --lat-lims) latLims="$2" ; shift 2 ;; # required - -n | --lon-lims) lonLims="$2" ; shift 2 ;; # required - -m | --ensemble) ensemble="$2" ; shift 2 ;; # optional - -k | --no-chunk) parallel=false ; shift ;; # optional + -e | --end-date) endDate="$2" ; shift 2 ;; # optional + -l | --lat-lims) latLims="$2" ; shift 2 ;; # optional + -n | --lon-lims) lonLims="$2" ; shift 2 ;; # optional + -f | --shape-file) shapefile="$2" ; shift 2 ;; # optional + -j | --submit-job) jobSubmission=true ; shift ;; # optional + -t | --print-geotiff) printGeotiff="$2" ; shift 2 ;; # optional + -a | --stat) stats="$2" ; shift 2 ;; # optional + -q | --quantile) quantiles="$2" ; shift 2 ;; # optional -p | --prefix) prefixStr="$2" ; shift 2 ;; # required -c | --cache) cache="$2" ; shift 2 ;; # optional + -E | --email) email="$2" ; shift 2 ;; # optional + -V | --version) version ; shift ;; # optional + -h | --help) usage ; shift ;; # optional # -- means the end of the arguments; drop this, and break out of the while loop --) shift; break ;; @@ -143,8 +161,6 @@ if [[ -z "${geotiffDir}" ]] || \ [[ -z "${geotiff}" ]] || \ [[ -z "${variables}" ]] || \ [[ -z "${outputDir}" ]] || \ - [[ -z "${latLims}" ]] || \ - [[ -z "${lonLims}" ]] || \ [[ -z "${prefixStr}" ]]; then echo "$(basename $0): mandatory option(s) missing."; @@ -152,9 +168,26 @@ if [[ -z "${geotiffDir}" ]] || \ exit 1; fi -# default value for timeScale if not provided as an argument -if [[ -z $timeScale ]]; then - timeScale="M" +# if $printGeotiff is not triggered +if [[ -z $printGeotiff ]]; then + printGeotiff=true +fi + +# check the value of $printGeotiff +if [[ -n $printGeotiff ]]; then + case "${printGeotiff,,}" in + "true" | "1" ) + printGeotiff="true" + ;; + + "false" | "0" ) + printGeotiff="false" + ;; + + *) + echo "$(basename $0): invalid value for '--print-geotiff', continuing with default value of 'true'" + ;; + esac fi # default value for cache path if not provided as an argument @@ -164,84 +197,65 @@ elif [[ -z $cache ]]; then cache="$HOME/scratch/.temp_data_$(date +"%N")" fi -# default value for parallelization -if [[ -z $parallel ]]; then - parallel=true -fi - # email withought job submission not allowed if [[ -n $email ]] && [[ -z $jobSubmission ]]; then echo "$(basename $0): Email is not supported wihtout job submission;" echo "$(basename $0): Continuing without email notification..." fi +# either shapefile or spatial extents arguments are allowed +if [[ -n $shapefile ]] && [[ -n $latLims ]]; then + echo "$(basename $0): ERROR! Either shapefile or spatial extents should be entered" + exit 1; +elif [[ -n $shapefile ]] && [[ -n $lonLims ]]; then + echo "$(basename $0): ERROR! Either shapefile or spatial extents should be entered" + exit 1; +fi -# =========================== -# Quasi-parallel requirements -# =========================== -# necessary arrays -startDateArr=() # start dates array -endDateArr=() # end dates array - -# necessary one-liner functions -unix_epoch () { date --date="$@" +"%s"; } # unix EPOCH command -format_date () { date --date="$1" +"$2"; } # format date - -# default date format -dateFormat="%Y-%m-%d %H:%M:%S" - -chunk_dates () { - # local variables - local toDate="$startDate" - local tStep="$1" - local midDate - local toDateEnd - - # if no chunking - if [[ "$parallel" == "false" ]]; then - startDateArr+=("$(format_date "$startDate" "$dateFormat")") - endDateArr+=("$(format_date "$endDate" "$dateFormat")") - return # exit the function - - # if chunking - elif [[ "$parallel" == "true" ]]; then - - while [[ "$(unix_epoch "$toDate")" -le "$(unix_epoch "$endDate")" ]]; do - midDate="$(format_date "$toDate" "%Y-%m-01")" - toDateEnd="$(format_date "$midDate $tStep -1hour" "$dateFormat")" - - # considering last month if not fully being a $tStep - if [[ "$(unix_epoch "$toDateEnd")" -ge "$(unix_epoch "$endDate")" ]]; then - startDateArr+=("$(format_date "$toDate" "$dateFormat")") - endDateArr+=("$(format_date "$endDate" "$dateFormat")") - break # break the while loop - fi - - startDateArr+=("$(format_date "$toDate" "$dateFormat")") - endDateArr+=("$(format_date "$toDateEnd" "$dateFormat")") - - toDate=$(date --date="$midDate $tStep") - done - fi -} +# if no crs is entered, assign the default value of EPSG 4326 +if [[ -z $crs ]]; then + crs=4326 +fi + +# at least $printGeotiff=true or $stats=stat1[,stat2[...]] must be provided +if [[ "${printGeotiff,,}" == "false" ]] && [[ -z $stats ]]; then + echo "$(basename $0): ERROR! At minimum, either of '--print-geotiff' or '--stats' must be provided" + exit 1; +fi + +# if quantile is not given in '--stat' but '--quantile' is provided +if [[ "$stats" != *"quantile"* ]] && [[ -n $quantiles ]]; then + echo "$(basename $0): ERROR! 'quantile' stat is not provided in '--stat' while '--quantile' argument is filled" + exit 1; +fi + +# if quantile is given in '--stat' but '--quantile' is not provided +if [[ "$stats" == *"quantile"* ]] && [[ -z $quantiles ]]; then + echo "$(basename $0): Warning! 'quantile' stat is provided in '--stat' while '--quantile' is not filled;" + echo "$(basename $0): Continuing with default values of 25th, 50th, and 75th quantiles" + quantiles="0.25,0.50,0.75" +fi # ====================== -# Necessary preparations +# Necessary Preparations # ====================== # put necessary arguments in an array - just for legibility -declare -A funcArgs=([jobSubmission]="$jobSubmission" \ - [geotiffDir]="$geotiffDir" \ +declare -A funcArgs=([geotiffDir]="$geotiffDir" \ + [crs]="$crs" \ [variables]="$variables" \ [outputDir]="$outputDir" \ - [timeScale]="$timeScale" \ [startDate]="$startDate" \ [endDate]="$endDate" \ [latLims]="$latLims" \ [lonLims]="$lonLims" \ + [shapefile]="$shapefile" \ + [jobSubmission]="$jobSubmission" \ + [printGeotiff]="$printGeotiff" \ + [stats]="$stats" \ + [quantiles]="$quantiles" \ [prefixStr]="$prefixStr" \ [cache]="$cache" \ - [ensemble]="$ensemble" \ ); @@ -259,41 +273,28 @@ call_processing_func () { # all processing script files must follow same input argument standard local scriptRun read -rd '' scriptRun <<- EOF - bash ${script} --geotiff-dir="${funcArgs[geotiffDir]}" --variable="${funcArgs[variables]}" --output-dir="${funcArgs[outputDir]}" --start-date="${funcArgs[startDate]}" --end-date="${funcArgs[endDate]}" --time-scale="${funcArgs[timeScale]}" --lat-lims="${funcArgs[latLims]}" --lon-lims="${funcArgs[lonLims]}" --prefix="${funcArgs[prefixStr]}" --cache="${funcArgs[cache]}" --ensemble="${funcArgs[ensemble]}" + bash ${script} --dataset-dir="${funcArgs[geotiffDir]}" --crs="${funcArgs[crs]}" --variable="${funcArgs[variables]}" --output-dir="${funcArgs[outputDir]}" --start-date="${funcArgs[startDate]}" --end-date="${funcArgs[endDate]}" --lat-lims="${funcArgs[latLims]}" --lon-lims="${funcArgs[lonLims]}" --shape-file="${funcArgs[shapefile]}" --print-geotiff="${funcArgs[printGeotiff]}" --stat="${funcArgs[stats]}" --quantile="${funcArgs[quantiles]}" --prefix="${funcArgs[prefixStr]}" --cache="${funcArgs[cache]}" EOF # evaluate the script file using the arguments provided if [[ "${funcArgs[jobSubmission]}" == true ]]; then - # chunk time-frame - chunk_dates "$chunkTStep" - local dateArrLen="$((${#startDateArr[@]}-1))" # or $endDateArr # Create a temporary directory for keeping job logs mkdir -p "$HOME/scratch/.gdt_logs" # SLURM batch file sbatch <<- EOF #!/bin/bash - #SBATCH --array=0-$dateArrLen #SBATCH --cpus-per-task=4 #SBATCH --nodes=1 #SBATCH --account=rpp-kshook #SBATCH --time=04:00:00 #SBATCH --mem=8GB #SBATCH --job-name=GWF_${scriptName} - #SBATCH --error=$HOME/scratch/.gdt_logs/GWF_%A-%a_err.txt - #SBATCH --output=$HOME/scratch/.gdt_logs/GWF_%A-%a.txt + #SBATCH --error=$HOME/scratch/.gdt_logs/GWF_%j_err.txt + #SBATCH --output=$HOME/scratch/.gdt_logs/GWF_%j.txt #SBATCH --mail-user=$email #SBATCH --mail-type=BEGIN,END,FAIL - $(declare -p startDateArr) - $(declare -p endDateArr) - tBegin="\${startDateArr[\${SLURM_ARRAY_TASK_ID}]}" - tEnd="\${endDateArr[\${SLURM_ARRAY_TASK_ID}]}" - - echo "${scriptName}.sh: #\${SLURM_ARRAY_TASK_ID} chunk submitted." - echo "${scriptName}.sh: Chunk start date is \$tBegin" - echo "${scriptName}.sh: Chunk end date is \$tEnd" - - srun ${scriptRun} --start-date="\$tBegin" --end-date="\$tEnd" --cache="${cache}-\${SLURM_ARRAY_JOB_ID}-\${SLURM_ARRAY_TASK_ID}" + srun ${scriptRun} --cache="${cache}-\${SLURM_JOB_ID}" EOF # echo message echo "$(basename $0): job submission details are printed under ${HOME}/scratch/.gdt_logs" @@ -308,7 +309,7 @@ call_processing_func () { # Checking Input GeoTIFF # ====================== -case "${dataset,,}" in +case "${geotiff,,}" in # MERIT Hydro "merithydro" | "merit hydro" | "merit-hydro" | "merit_hydro") call_processing_func "$(dirname $0)/merit_hydro/merit_hydro.sh" @@ -319,11 +320,6 @@ case "${dataset,,}" in call_processing_func "$(dirname $0)/soil_grids/soil_grids_v1.sh" ;; - # Soil Grids v2 - "soil-grids-v2" | "soilgridsv2" | "soil_grids_v2" | "soil grids v2") - call_processing_func "$(dirname $0)/soil_grids/soil_grids_v2.sh" - ;; - # MODIS "modis") call_processing_func "$(dirname $0)/modis/modis.sh" diff --git a/merit_hydro/.merit_hydro.sh.swp b/merit_hydro/.merit_hydro.sh.swp deleted file mode 100644 index d0c6961..0000000 Binary files a/merit_hydro/.merit_hydro.sh.swp and /dev/null differ diff --git a/merit_hydro/README.md b/merit_hydro/README.md new file mode 100644 index 0000000..6070ca9 --- /dev/null +++ b/merit_hydro/README.md @@ -0,0 +1,89 @@ +# `MERIT-Hydro` Geospatial Dataset +In this file, the necessary technical details of the dataset is explained. + +## Location of the `MERIT-Hydro` Dataset Files +The `MERIT-Hydro` geospatial dataset files are located under the following directory accessible from Digital Alliance (formerly Compute Canada) Graham cluster: + +```console +/project/rpp-kshook/CompHydCore/merit_hydro/raw_data +``` + +And the structure of the files is as following: +```console +/project/rpp-kshook/CompHydCore/merit_hydro/raw_data +├── elv +│ ├── elv_n00e000.tar +│ ├── elv_n00e030.tar +│ ├── . +│ ├── . +│ ├── . +│ └── elv_s60w180.tar +. +. +. +└── %var + ├── %var_n00e000.tar + ├── %var_n00e030.tar + ├── . + ├── . + ├── . + └── %var_s60w180.tar +``` +In each `.tar` file, there are multiple GeoTIFF (`.tif`) files that cover the extents indicated in the parent `.tar` file name. For example, the `elv_n00e000.tar` contains the following files: +```console +/path/to/elv_n00e000.tar/contents +├── n00e005_elv.tif +├── n00e010_elv.tif +├── n00e015_elv.tif +├── n00e020_elv.tif +├── n00e025_elv.tif +├── n05e000_elv.tif +├── n05e005_elv.tif +├── n05e010_elv.tif +├── n05e015_elv.tif +├── n05e020_elv.tif +├── n05e025_elv.tif +├── n10e000_elv.tif +├── n10e005_elv.tif +├── n10e010_elv.tif +├── n10e015_elv.tif +├── n10e020_elv.tif +├── n10e025_elv.tif +├── n15e000_elv.tif +├── n15e005_elv.tif +├── n15e010_elv.tif +├── n15e015_elv.tif +├── n15e020_elv.tif +├── n15e025_elv.tif +├── n20e000_elv.tif +├── n20e005_elv.tif +├── n20e010_elv.tif +├── n20e015_elv.tif +├── n20e020_elv.tif +├── n20e025_elv.tif +├── n25e000_elv.tif +├── n25e005_elv.tif +├── n25e010_elv.tif +├── n25e015_elv.tif +├── n25e020_elv.tif +└── n25e025_elv.tif +``` + +## Spatial and Temporal Extents + +The spatial extent of this dataset covers longitudes from `-180` to `+180` degress and latitudes from `-60` to `+90` degress. This dataset is static and does not vary with time. + +## Dataset Variables +This dataset has 6 main variables that are described in the following table: + +|# |Variable Name (used in `gistool`) |Description |Comments | +|-------|---------------------------------------|---------------------------------------|---------------| +|1 |`dir` |Flow Direction Map | | +|2 |`elv` |Adjusted Elevation | | +|3 |`upa` |Upstream Drainage Area | | +|4 |`upg` |Number of Upstream Drainage Pixels | | +|5 |`wth` |River Width | | +|6 |`hnd` |Height Above Nearest Drainage | | + +A description of all the variables included in this dataset is explained on the `MERIT-Hydro`'s [website](http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_Hydro/). + diff --git a/merit_hydro/merit_hydro.sh b/merit_hydro/merit_hydro.sh index 3e41e81..ce74a56 100755 --- a/merit_hydro/merit_hydro.sh +++ b/merit_hydro/merit_hydro.sh @@ -1,9 +1,9 @@ #!/bin/bash -# GeoTIFF Processing Workflow +# GIS Data Processing Workflow # Copyright (C) 2022, University of Saskatchewan -# Copyright (C) 2021-22, Wouter Knoben +# Copyright (C) 2021, Wouter Knoben # -# This file is part of GeoTIFF Processing Workflow +# This file is part of GIS Data Processing Workflow # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -23,8 +23,11 @@ # ========================= # 1. Parts of the code are taken from https://www.shellscript.sh/tips/getopt/index.html # 2. General ideas of GeoTIFF subsetting are taken from https://github.com/CH-Earth/CWARHM -# developed mainly by Wouter Knoben (hence the header copyright credit). See the publication +# developed mainly by Wouter Knoben (hence the header copyright credit). See the preprint # at: https://www.essoar.org/doi/10.1002/essoar.10509195.1 +# 3. `merit_extent` function is adopted from: https://unix.stackexchange.com/a/168486/487495 +# and https://unix.stackexchange.com/a/385175/487495 + # ================ # General comments @@ -39,11 +42,12 @@ # Usage Functions # =============== short_usage() { - echo "usage: $(basename $0) [-io DIR] [-v VARS] [-se DATE] [-t CHAR] [-ln REAL,REAL]" + echo "usage: $(basename $0) -cio DIR -v var1[,var2[...]] [-r INT] [-se DATE] [-ln REAL,REAL] [-f PATH] [-t BOOL] [-a stat1[,stat2,[...]] [-q q1[,q2[...]]]] [-p STR] " } + # argument parsing using getopt - WORKS ONLY ON LINUX BY DEFAULT -parsedArguments=$(getopt -a -n extract-dataset -o i:v:o:s:e:t:l:n:c:p: --long geotiff-dir:,variables:,output-dir:,start-date:,end-date:,time-scale:,lat-lims:,lon-lims:,cache:,prefix: -- "$@") +parsedArguments=$(getopt -a -n merit_hydro -o i:o:v:r:s:e:l:n:f:t:a:q:p:c: --long dataset-dir:,output-dir:,variable:,crs:,start-date:,end-date:,lat-lims:,lon-lims:,shape-file:,print-geotiff:,stat:,quantile:,prefix:,cache: -- "$@") validArguments=$? if [ "$validArguments" != "0" ]; then short_usage; @@ -52,7 +56,7 @@ fi # check if no options were passed if [ $# -eq 0 ]; then - echo "ERROR $(basename $0): arguments missing"; + echo "$(basename $0): ERROR! arguments missing"; exit 1; fi @@ -61,38 +65,63 @@ eval set -- "$parsedArguments" while : do case "$1" in - -i | --geotiff-dir) geotiffDir="$2" ; shift 2 ;; # required - -v | --variables) variables="$2" ; shift 2 ;; # required + -i | --dataset-dir) geotiffDir="$2" ; shift 2 ;; # required -o | --output-dir) outputDir="$2" ; shift 2 ;; # required - -s | --start-date) startDate="$2" ; shift 2 ;; # redundant - added for compatibility + -v | --variable) variables="$2" ; shift 2 ;; # required + -r | --crs) crs="$2" ; shift 2 ;; # required + -s | --start-date) startDate="$2" ; shift 2 ;; # redundant - added for compatibility -e | --end-date) endDate="$2" ; shift 2 ;; # redundant - added for compatibility - -t | --time-scale) timeScale="$2" ; shift 2 ;; # required - -l | --lat-lims) latLims="$2" ; shift 2 ;; # required - -n | --lon-lims) lonLims="$2" ; shift 2 ;; # required - -c | --cache) cacheDir="$2" ; shift 2 ;; # required - -p | --prefix) prefix="$2" ; shift 2 ;; # required + -l | --lat-lims) latLims="$2" ; shift 2 ;; # required - could be redundant + -n | --lon-lims) lonLims="$2" ; shift 2 ;; # required - could be redundant + -f | --shape-file) shapefile="$2" ; shift 2 ;; # required - could be redundant + -t | --print-geotiff) printGeotiff="$2" ; shift 2 ;; # required + -a | --stat) stats="$2" ; shift 2 ;; # optional + -q | --quantile) quantiles="$2" ; shift 2 ;; # optional + -p | --prefix) prefix="$2" ; shift 2 ;; # optional + -c | --cache) cache="$2" ; shift 2 ;; # required # -- means the end of the arguments; drop this, and break out of the while loop --) shift; break ;; # in case of invalid option *) - echo "ERROR $(basename $0): invalid option '$1'"; + echo "$(basename $0): ERROR! invalid option '$1'"; short_usage; exit 1 ;; esac done -# check if dates are provided -if [[ -n "$startDate" ]] && [[ -n "$endDate" ]]; then - echo "ERROR $(basename $0): redundant argument (date lims) provided"; +# check if $ensemble is provided +if [[ -n "$startDate" ]] || [[ -n "$endDate" ]]; then + echo "$(basename $0): ERROR! redundant argument (time extents) provided"; exit 1; fi +# check the prefix if not set +if [[ -z $prefix ]]; then + prefix="merit_hydro_" +fi + +# parse comma-delimited variables +IFS=',' read -ra variables <<< "${variables}" + # ===================== # Necessary Assumptions # ===================== -# The structure of file names is as follows: "VAR_(N|S)(STARTDEG)(W|E)(STARTDEG)" (no file extension) +# TZ to be set to UTC to avoid invalid dates due to Daylight Saving +alias date='TZ=UTC date' +# expand aliases for the one stated above +shopt -s expand_aliases +# hard-coded paths +renvCache="/project/rpp-kshook/Climate_Forcing_Data/assets/r-envs/" # general renv cache path +exactextractrCache="${renvCache}/exact-extract-env" # exactextractr renv cache path +renvPackagePath="${renvCache}/renv_0.15.5.tar.gz" # renv_0.15.5 source path + + +# ========================== +# Necessary Global Variables +# ========================== +# the structure of the original file names is as follows: "%{var}_%{s or n}%{lat}%{w or e}%{lon}.tar" # =================== @@ -100,184 +129,255 @@ fi # =================== # Modules below available on Compute Canada (CC) Graham Cluster Server load_core_modules () { - module -q load gdal/3.0.4; + module -q purge + module -q load gcc/9.3.0 + module -q load r/4.1.2 + module -q load gdal/3.0.4 + module -q load udunits/2.2.28 + module -q load geos/3.10.2 + module -q load proj/9.0.0 } -load_core_modules # load necessary modules +load_core_modules -####################################### -# useful one-liners -####################################### +# ================= +# Useful One-liners +# ================= +# MERIT-Hydro specific latitude and longitude limits +merit_extent () { echo "define merit_extent_lat (x) \ + {if (x<0) { if (x%30 == 0) {return ((x/30)*30)} \ + else {return (((x/30)-1)*30) }} \ + else {return ((x/30)*30) } }; \ + merit_extent_lat($1)" | bc; } -# =============== -# Data Processing -# =============== -# display info -echo "$(basename $0): processing MERIT Hydro GeoTIFF..." +# sorting a comma-delimited string of real numbers +sort_comma_delimited () { IFS=',' read -ra arr <<< "$*"; echo ${arr[*]} | tr " " "\n" | sort -n | tr "\n" " "; } -# make the output directory -mkdir -p "$outputDir" # create output directory - -# constructing the range of years -startYear=$(date --date="$startDate" "+%Y") # start year (first folder) -endYear=$(date --date="$endDate" "+%Y") # end year (last folder) -yearsRange=$(seq $startYear $endYear) - -# constructing $toDate and $endDate in unix time EPOCH -toDate=$startDate -toDateUnix=$(date --date="$startDate" "+%s") # first date in unix EPOCH time -endDateUnix=$(date --date="$endDate" "+%s") # end date in unix EPOCH time - -# extract the associated indices corresponding to latLims and lonLims -module -q load ncl/6.6.2 -## min and max of latitude and longitude limits -minLat=$(echo $latLims | cut -d ',' -f 1) -maxLat=$(echo $latLims | cut -d ',' -f 2) -minLon=$(echo $lonLims | cut -d ',' -f 1) -maxLon=$(echo $lonLims | cut -d ',' -f 2) -## extract coord -coordIdx="$(ncl -nQ 'coord_file='\"$coordMainFile\" 'minlat='"$minLat" 'maxlat='"$maxLat" 'minlon='"$minLon" 'maxlon='"$maxLon" "$coordIdxScript")" -lonLimsIdx="$(echo $coordIdx | cut -d ' ' -f 1)" -latLimsIdx="$(echo $coordIdx | cut -d ' ' -f 2)" -module -q unload ncl/6.6.2 -load_core_modules +# MERIT-Hydro coordinate signs and digit style +lat_sign () { if (( $* < 0 )); then printf "s%02g\n" $(echo "$*" | tr -d '-'); else printf "n%02g\n" "$*"; fi; } +lon_sign () { if (( $* < 0 )); then printf "w%03g\n" $(echo "$*" | tr -d '-'); else printf "e%03g\n" "$*"; fi; } -# produce subsetted $coordFile as well -mkdir -p "$cacheDir" -coordFileSubset="${cacheDir}/coordFileSubset.nc" -# if subsetted coordinate file does not exist, make one -if [[ -f "$coordFileSubset" ]]; then - : -else - ncks -v "XLAT,XLONG" \ - -d "$latVar","$latLimsIdx" \ - -d "$lonVar","$lonLimsIdx" \ - "$coordEssFile" "$coordFileSubset" || true -fi +# log date format +logDate () { echo "($(date +"%Y-%m-%d %H:%M:%S")) "; } -# for each year (folder) do the following calculations -for yr in $yearsRange; do - # creating a temporary directory for temporary files - echo "$(basename $0): creating cache files for year $yr in $cacheDir" - mkdir -p "$cacheDir/$yr" # making the directory +####################################### +# Parse latitute/longitude limits +# +# Globals: +# sortedArr: sorted ascendingly array +# of the input numbers +# +# Arguments: +# coordLims: comma-delimited string +# of real numbers +# +# Outputs: +# sequence of integers echoed from an +# array. +####################################### +parse_coord_lims () { + # local variables + local coordLims="$@" + local limsSeq + local limSorted + + # parsing the input string + IFS=' ' read -ra limsSorted <<< $(sort_comma_delimited "$coordLims") + # creating sequence of numbers + limSeq=($(seq $(merit_extent "${limsSorted[0]}") \ + 30 \ + $(merit_extent "${limsSorted[1]}")) \ + ) + # echoing the `limSeq` + echo "${limSeq[@]}" +} - # setting the end point, either the end of current year, or the $endDate - endOfCurrentYearUnix=$(date --date="$yr-01-01 +1year -1hour" "+%s") # last time-step of the current year - if [[ $endOfCurrentYearUnix -le $endDateUnix ]]; then - endPointUnix=$endOfCurrentYearUnix - else - endPointUnix=$endDateUnix - fi - # extract variables from the forcing data files - while [[ "$toDateUnix" -le "$endPointUnix" ]]; do - # date manipulations - toDateFormatted=$(date --date "$toDate" "+$format") # current timestamp formatted to conform to CONUSI naming convention +####################################### +# extract original MERIT-Hydro `tar`s +# for the latitude and longitude exten- +# ts of interest. +# +# Globals: +# None +# +# Arguments: +# sourceDir: the path to the +# sourceDir +# destDir: the path to the destinati- +# on +# var: the name of the MERIT-Hydro +# dataset variable; MUST be one +# of the following: elv;dir;hnd +# upa;upg;wth +# +# Outputs: +# untarred files produced under +# `$destDir` +####################################### +extract_merit_tar () { + # local variables + local sourceDir="$1" # source path + local destDir="$2" # destination path + local var="$3" # MERIT-Hydro variable + local lat # counter variable + local lon # counter variable + + # create sequence of latitude and longitude values + local latSeq=($(parse_coord_lims "$latLims")) + local lonSeq=($(parse_coord_lims "$lonLims")) + + # if rectangular subset is requested + for lat in "${latSeq[@]}"; do + for lon in "${lonSeq[@]}"; do + # strip-components to evade making + # folder separately for each tar file + tar --strip-components=1 -xf "${sourceDir}/${var}/${var}_$(lat_sign ${lat})$(lon_sign ${lon}).tar" -C "${destDir}" + done + done +} - # creating file name - file="${fileStruct}_${toDateFormatted}" # current file name - # extracting variables from the files and spatial subsetting - ncks -O -v "$variables" \ - -d "$latVar","$latLimsIdx" \ - -d "$lonVar","$lonLimsIdx" \ - "$datasetDir/$yr/$file" "$cacheDir/$yr/$file" & # extracting $variables - [ $( jobs | wc -l ) -ge $( nproc ) ] && wait +####################################### +# subset GeoTIFFs +# +# Globals: +# latLims: comma-delimited latitude +# limits +# lonLims: comma-delimited longitude +# limits +# +# Arguments: +# sourceVrt: source vrt file +# destPath: destionation path (inclu- +# ding file name) +# +# Outputs: +# one mosaiced (merged) GeoTIFF under +# the $destDir +####################################### +subset_geotiff () { + # local variables + local latMin + local latMax + local lonMin + local lonMax + local sortedLats + local sortedLons + # reading arguments + local sourceVrt="$1" + local destPath="$2" + + # extracting minimum and maximum of latitude and longitude respectively + ## latitude + sortedLats=($(sort_comma_delimited "$latLims")) + latMin="${sortedLats[0]}" + latMax="${sortedLats[1]}" + ## longitude + sortedLons=($(sort_comma_delimited "$lonLims")) + lonMin="${sortedLons[0]}" + lonMax="${sortedLons[1]}" + + # subset based on lat/lon - flush to disk at 500MB + GDAL_CACHEMAX=500 + gdal_translate --config GDAL_CACHEMAX 500 \ + -co COMPRESS="DEFLATE" \ + -co BIGTIFF="YES" \ + -projwin $lonMin $latMax $lonMax $latMin "${sourceVrt}" "${destPath}" \ + > /dev/null; +} - # increment time-step by one unit - toDate=$(date --date "$toDate 1hour") # current time-step - toDateUnix=$(date --date="$toDate" "+%s") # current timestamp in unix EPOCH time - done - # wait to make sure the while loop is finished - wait +# =============== +# Data Processing +# =============== +# display info +echo "$(logDate)$(basename $0): processing MERIT-Hydro GeoTIFF(s)..." - # go to the next year if necessary - if [[ "$toDateUnix" == "$endOfCurrentYearUnix" ]]; then - toDate=$(date --date "$toDate 1hour") - fi +# make the cache directory +echo "$(logDate)$(basename $0): creating cache directory under $cache" +mkdir -p "$cache" - # make the output directory - mkdir -p "$outputDir/$yr/" - - # data files for the current year with extracted $variables - files=($cacheDir/$yr/*) - # sorting files to make sure the time-series is correct - IFS=$'\n' files=($(sort <<<"${files[*]}")); unset IFS - - # check the $timeScale variable - case "${timeScale,,}" in - - h) - # going through every hourly file - for f in "${files[@]}"; do - # extracting information - extract_file_info "$f" - # necessary NetCDF operations - generate_netcdf "${fileName}" "$fileNameDate" "$fileNameTime" "$cacheDir/$yr/" "$outputDir/$yr/" "$timeScale" - done - ;; - - d) - # construct the date arrays - populate_date_arrays - - # for each date (i.e., YYYY-MM-DD) - for d in "${uniqueDatesArr[@]}"; do - # find the index of the $timesArr corresponding to $d -> $idx - date_match_idx "$d" "1-3" "-" "${datesArr[@]}" - - # concatenate hourly netCDF files to daily file, i.e., already produces _cat.nc files - dailyFiles=($cacheDir/$yr/${fileStruct}_${d}*) - concat_files "${fileStruct}_${d}" "$cacheDir/$yr/" "${dailyFiles[@]}" - - # implement CDO/NCO operations - generate_netcdf "${fileStruct}_${d}" "$d" "${timesArr[$idx]}" "$cacheDir/$yr/" "$outputDir/$yr/" "$timeScale" - done - ;; - - m) - # construct the date arrays - populate_date_arrays - - # for each date (i.e., YYYY-MM-DD) - for m in "${uniqueMonthsArr[@]}"; do - # find the index of the $timesArr corresponding to $d -> $idx - # $m is in 'YYYY-MM' format - date_match_idx "$m" "1,2" "-" "${datesArr[@]}" - - # concatenate hourly netCDF files to monthly files, i.e., already produced *_cat.nc files - monthlyFiles=($cacheDir/$yr/${fileStruct}_${m}*) - concat_files "${fileStruct}_${m}" "$cacheDir/$yr/" "${monthlyFiles[@]}" - - # implement CDO/NCO operations - generate_netcdf "${fileStruct}_${m}" "${datesArr[$idx]}" "${timesArr[$idx]}" "$cacheDir/$yr/" "$outputDir/$yr/" "$timeScale" - done - ;; - - y) - # construct the date arrays - populate_date_arrays - - # find the index of the $timesArr and $datesArr corresponding to $d -> $idx - date_match_idx "$yr" "1" "-" "${datesArr[@]}" - - # concatenate hourly to yearly files - produced _cat.nc files - yearlyFiles=($cacheDir/$yr/${fileStruct}_${yr}*) - concat_files "${fileStruct}_${yr}" "$cacheDir/$yr/" "${yearlyFiles[@]}" - - # implement CDO/NCO operations - generate_netcdf "${fileStruct}_${yr}" "${datesArr[$idx]}" "${timesArr[$idx]}" "$cacheDir/$yr/" "$outputDir/$yr/" "$timeScale" - ;; +# make the output directory +echo "$(logDate)$(basename $0): creating output directory under $outputDir" +mkdir -p "$outputDir" # making the output directory + +# if shapefile is provided extract the extents from it +if [[ -n $shapefile ]]; then + # extract the shapefile extent + IFS=' ' read -ra shapefileExtents <<< "$(ogrinfo -so -al "$shapefile" | sed 's/[),(]//g' | grep Extent)" + # transform the extents in case they are not in EPSG:4326 + IFS=':' read -ra sourceProj4 <<< "$(gdalsrsinfo $shapefile | grep -e "PROJ.4")" # source Proj4 value + if [[ -n $sourceProj4 ]]; then + : + else + echo "$(logDate)$(basename $0): WARNING! Assuming WSG84 CRS for the input ESRI shapefile" + sourceProj4=("PROJ.4" " +proj=longlat +datum=WGS84 +no_defs") # made an array for compatibility with the following statements + fi + + # transform limits and assing to variables + IFS=' ' read -ra leftBottomLims <<< $(echo "${shapefileExtents[@]:1:2}" | gdaltransform -s_srs "${sourceProj4[1]}" -t_srs EPSG:4326 -output_xy) + IFS=' ' read -ra rightTopLims <<< $(echo "${shapefileExtents[@]:4:5}" | gdaltransform -s_srs "${sourceProj4[1]}" -t_srs EPSG:4326 -output_xy) + # define $latLims and $lonLims from $shapefileExtents + lonLims="${leftBottomLims[0]},${rightTopLims[0]}" + latLims="${leftBottomLims[1]},${rightTopLims[1]}" +fi - esac +# untar MERIT-Hydro files and build .vrt file out of them +# for each variable +echo "$(logDate)$(basename $0): untarring MERIT-Hydro variables under $cache" +for var in "${variables[@]}"; do + # create temporary directories for each variable + mkdir -p "$cache/$var" + # extract the `tar`s + extract_merit_tar "$geotiffDir" "${cache}/${var}" "$var" + # make .vrt out of each variable's GeoTIFFs + # ATTENTION: the second argument is not contained with quotation marks + gdalbuildvrt "${cache}/${var}.vrt" ${cache}/${var}/*.tif -resolution highest -sd 1 > /dev/null done -mkdir "$HOME/empty_dir" -echo "$(basename $0): deleting temporary files from $cacheDir" -rsync -aP --delete "$HOME/empty_dir/" "$cacheDir" -rm -r "$cacheDir" -echo "$(basename $0): temporary files from $cacheDir are removed" -echo "$(basename $0): results are produced under $outputDir" +# subset and produce stats if needed +if [[ "$printGeotiff" == "true" ]]; then + echo "$(logDate)$(basename $0): subsetting GeoTIFFs under $outputDir" + for var in "${variables[@]}"; do + # subset based on lat and lon values + subset_geotiff "${cache}/${var}.vrt" "${outputDir}/${prefix}${var}.tif" + done +fi + +## make R renv project directory +if [[ -n "$shapefile" ]] && [[ -n $stats ]]; then + echo "$(logDate)$(basename $0): Extracting stats under $outputDir" + mkdir -p "$cache/r-virtual-env/" + ## make R renv in $cache + virtualEnvPath="$cache/r-virtual-env/" + cp "$(dirname $0)/../assets/renv.lock" "$virtualEnvPath" + ## load necessary modules - excessive, mainly for clarification + load_core_modules + + # extract given stats for each variable + for var in "${variables[@]}"; do + ## build renv and create stats + Rscript "$(dirname $0)/../assets/stats.R" \ + "$exactextractrCache" \ + "$renvPackagePath" \ + "$virtualEnvPath" \ + "$virtualEnvPath" \ + "${virtualEnvPath}/renv.lock" \ + "${cache}/${var}.vrt" \ + "$shapefile" \ + "$outputDir/${prefix}stats_${var}.csv" \ + "$stats" \ + "$quantiles" >> "${outputDir}/${prefix}stats_${var}.log" 2>&1; + done +fi + +# produce stats if required +mkdir "$HOME/empty_dir" +echo "$(logDate)$(basename $0): deleting temporary files from $cache" +rsync --quiet -aP --delete "$HOME/empty_dir/" "$cache" +rm -r "$cache" +echo "$(logDate)$(basename $0): temporary files from $cache are removed" +echo "$(logDate)$(basename $0): results are produced under $outputDir" diff --git a/modis/README.md b/modis/README.md new file mode 100644 index 0000000..f8efe4d --- /dev/null +++ b/modis/README.md @@ -0,0 +1,78 @@ +# `MODIS` Geospatial Dataset +In this file, the necessary technical details of the dataset is explained. It is worth noting that, `MODIS` has many products and here only useful products for the Canadian hydrological community are described. + +## Location of the `MODIS` Dataset Files +The `MODIS` geospatial dataset files are located under the following directory accessible from Digital Alliance (formerly Compute Canada) Graham cluster: + +```console +/project/rpp-kshook/Model_Output/MODIS +``` + +And the structure of the files is as following: + +```console +/project/rpp-kshook/Model_Output/MODIS +├── MCD12Q1.006 +│ ├── 2001.01.01 +│ │ ├── MCD12Q1.A2001001.h00v08.006.2018142182903.hdf +│ │ ├── MCD12Q1.A2001001.h00v08.006.2018142182903.hdf.xml +│ │ ├── MCD12Q1.A2001001.h00v09.006.2018142182901.hdf +│ │ ├── MCD12Q1.A2001001.h00v09.006.2018142182901.hdf.xml +│ │ ├── . +│ │ ├── . +│ │ ├── . +│ │ ├── MCD12Q1.A2001001.h35v10.006.2018142183401.hdf +│ │ └── MCD12Q1.A2001001.h35v10.006.2018142183401.hdf.xml +│ ├── 2002.01.01 +│ │ ├── MCD12Q1.A2002001.h00v08.006.2018143043830.hdf +│ │ ├── MCD12Q1.A2002001.h00v08.006.2018143043830.hdf.xml +│ │ ├── MCD12Q1.A2002001.h00v09.006.2018143043923.hdf +│ │ ├── MCD12Q1.A2002001.h00v09.006.2018143043923.hdf.xml +│ │ ├── . +│ │ ├── . +│ │ ├── . +│ │ ├── MCD12Q1.A2002001.h35v10.006.2018143070809.hdf +│ │ └── MCD12Q1.A2002001.h35v10.006.2018143070809.hdf.xml +│ . +│ . +│ . +│ ├── %YYYY.01.01 +│ │ ├── MCD12Q1.A{%Y}001.h00v08.006.{%HASHNUM}.hdf +│ │ ├── MCD12Q1.A{%Y}001.h00v08.006.{%HASHNUM}.hdf.xml +│ │ . +│ │ . +│ │ . +│ . +│ . +│ . +│ └── 2020.01.01 +│ ├── MCD12Q1.A2020001.h00v08.006.2021361185603.hdf +│ ├── MCD12Q1.A2020001.h00v08.006.2021361185603.hdf.xml +│ ├── MCD12Q1.A2020001.h00v09.006.2021361185533.hdf +│ ├── MCD12Q1.A2020001.h00v09.006.2021361185533.hdf.xml +│ ├── . +│ ├── . +│ ├── . +│ ├── MCD12Q1.A2020001.h35v10.006.2021361213521.hdf +│ └── MCD12Q1.A2020001.h35v10.006.2021361213521.hdf.xml +└── %var + ├── . + ├── . + ├── . + └── . +``` +As is obvious from the above file structure, the `MODIS` dataset repository currently has only one variable included (i.e., `MCD12Q1.006`). "The MCD12Q1 V6 product provides global land cover types at yearly intervals (2001-[2020]) derived from six different classification schemes. It is derived using supervised classifications of `MODIS` Terra and Aqua reflectance data. The supervised classifications then undergo additional post-processing that incorporate prior knowledge and ancillary information to further refine specific classes" [^reference]. + +[^reference]: Google. (n.d.). MCD12Q1.006 MODIS land cover type yearly global 500M | Earth Engine Data catalog | Google developers. Google. Retrieved July 7, 2022, from https://developers.google.com/earth-engine/datasets/catalog/MODIS_006_MCD12Q1. + +## Spatial and Temporal Extents + +The spatial extent of this dataset covers longitudes from `-180` to `+180` degress and latitudes from `-90` to `+90` degress. As mentioned above, the `MCD12Q1.006` variable includes yearly land cover types from 2001 to 2020. + +## Dataset Variables +This dataset has 1 main variable that is described in the following table: + +|# |Variable Name (used in `gistool`) |Description |Comments | +|-------|---------------------------------------|---------------------------------------|---------------| +|1 |`MCD12Q1.006` |Global land cover classes | | + diff --git a/modis/modis.sh b/modis/modis.sh new file mode 100755 index 0000000..93ae28a --- /dev/null +++ b/modis/modis.sh @@ -0,0 +1,322 @@ +#!/bin/bash +# GIS Data Processing Workflow +# Copyright (C) 2022, University of Saskatchewan +# Copyright (C) 2021, Wouter Knoben +# +# This file is part of GIS Data Processing Workflow +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# ========================= +# Credits and contributions +# ========================= +# 1. Parts of the code are taken from https://www.shellscript.sh/tips/getopt/index.html +# 2. General ideas of GeoTIFF subsetting are taken from https://github.com/CH-Earth/CWARHM +# developed mainly by Wouter Knoben (hence the header copyright credit). See the preprint +# at: https://www.essoar.org/doi/10.1002/essoar.10509195.1 + + +# ================ +# General comments +# ================ +# * All variables are camelCased for distinguishing from function names; +# * function names are all in lower_case with words seperated by underscore for legibility; +# * shell style is based on Google Open Source Projects' +# Style Guide: https://google.github.io/styleguide/shellguide.html + + +# =============== +# Usage Functions +# =============== +short_usage() { + echo "usage: $(basename $0) -cio DIR -v var1[,var2[...]] [-r INT] [-se DATE] [-ln REAL,REAL] [-f PATH] [-t BOOL] [-a stat1[,stat2,[...]] [-q q1[,q2[...]]]] [-p STR] " +} + + +# argument parsing using getopt - WORKS ONLY ON LINUX BY DEFAULT +parsedArguments=$(getopt -a -n modis -o i:o:v:r:s:e:l:n:f:t:a:q:p:c: --long dataset-dir:,output-dir:,variable:,crs:,start-date:,end-date:,lat-lims:,lon-lims:,shape-file:,print-geotiff:,stat:,quantile:,prefix:,cache: -- "$@") +validArguments=$? +if [ "$validArguments" != "0" ]; then + short_usage; + exit 1; +fi + +# check if no options were passed +if [ $# -eq 0 ]; then + echo "$(basename $0): ERROR! arguments missing"; + exit 1; +fi + +# check long and short options passed +eval set -- "$parsedArguments" +while : +do + case "$1" in + -i | --dataset-dir) geotiffDir="$2" ; shift 2 ;; # required + -o | --output-dir) outputDir="$2" ; shift 2 ;; # required + -v | --variable) variables="$2" ; shift 2 ;; # required + -r | --crs) crs="$2" ; shift 2 ;; # required + -s | --start-date) startDate="$2" ; shift 2 ;; # required + -e | --end-date) endDate="$2" ; shift 2 ;; # required + -l | --lat-lims) latLims="$2" ; shift 2 ;; # required - could be redundant + -n | --lon-lims) lonLims="$2" ; shift 2 ;; # required - could be redundant + -f | --shape-file) shapefile="$2" ; shift 2 ;; # required - could be redundant + -t | --print-geotiff) printGeotiff="$2" ; shift 2 ;; # required + -a | --stat) stats="$2" ; shift 2 ;; # optional + -q | --quantile) quantiles="$2" ; shift 2 ;; # optional + -p | --prefix) prefix="$2" ; shift 2 ;; # optional + -c | --cache) cache="$2" ; shift 2 ;; # required + + # -- means the end of the arguments; drop this, and break out of the while loop + --) shift; break ;; + + # in case of invalid option + *) + echo "$(basename $0): ERROR! invalid option '$1'"; + short_usage; exit 1 ;; + esac +done + +# check if $ensemble is provided +if [[ -z "$startDate" ]] || [[ -z "$endDate" ]]; then + echo "$(basename $0): Warning! time extents missing, considering full time range"; + startDate="2001" + endDate="2020" +fi + +# check the prefix if not set +if [[ -z $prefix ]]; then + prefix="modis_" +fi + +# parse comma-delimited variables +IFS=',' read -ra variables <<< "${variables}" + + +# ===================== +# Necessary Assumptions +# ===================== +# TZ to be set to UTC to avoid invalid dates due to Daylight Saving +alias date='TZ=UTC date' +# expand aliases for the one stated above +shopt -s expand_aliases +# hard-coded paths +renvCache="/project/rpp-kshook/Climate_Forcing_Data/assets/r-envs/" # general renv cache path +exactextractrCache="${renvCache}/exact-extract-env" # exactextractr renv cache path +renvPackagePath="${renvCache}/renv_0.15.5.tar.gz" # renv_0.15.5 source path + + +# ========================== +# Necessary Global Variables +# ========================== +# None + + +# =================== +# Necessary Functions +# =================== +# Modules below available on Compute Canada (CC) Graham Cluster Server +load_core_modules () { + module -q purge + module -q load gcc/9.3.0 + module -q load r/4.1.2 + module -q load gdal/3.0.4 + module -q load udunits/2.2.28 + module -q load geos/3.10.2 + module -q load proj/9.0.0 +} +load_core_modules + + +# ================= +# Useful One-liners +# ================= +# sorting a comma-delimited string of real numbers +sort_comma_delimited () { IFS=',' read -ra arr <<< "$*"; echo ${arr[*]} | tr " " "\n" | sort -n | tr "\n" " "; } + +# log date format +logDate () { echo "($(date +"%Y-%m-%d %H:%M:%S")) "; } + + +####################################### +# subset GeoTIFFs +# +# Globals: +# latLims: comma-delimited latitude +# limits +# lonLims: comma-delimited longitude +# limits +# +# Arguments: +# sourceVrt: source vrt file (or +# tif!) +# destPath: destionation path (inclu- +# ding file name) +# +# Outputs: +# one mosaiced (merged) GeoTIFF under +# the $destDir +####################################### +subset_geotiff () { + # local variables + local latMin + local latMax + local lonMin + local lonMax + local sortedLats + local sortedLons + # reading arguments + local sourceVrt="$1" + local destPath="$2" + + # extracting minimum and maximum of latitude and longitude respectively + ## latitude + sortedLats=($(sort_comma_delimited "$latLims")) + latMin="${sortedLats[0]}" + latMax="${sortedLats[1]}" + ## longitude + sortedLons=($(sort_comma_delimited "$lonLims")) + lonMin="${sortedLons[0]}" + lonMax="${sortedLons[1]}" + + # subset based on lat/lon - flush to disk at 500MB + GDAL_CACHEMAX=500 + gdal_translate --config GDAL_CACHEMAX 500 \ + -co COMPRESS="DEFLATE" \ + -co BIGTIFF="YES" \ + -projwin $lonMin $latMax $lonMax $latMin "${sourceVrt}" "${destPath}" \ + > /dev/null; +} + + +# =============== +# Data Processing +# =============== +# display info +echo "$(logDate)$(basename $0): processing MODIS HDF(s)..." + +# make the cache directory +echo "$(logDate)$(basename $0): creating cache directory under $cache" +mkdir -p "$cache" + +# make the output directory +echo "$(logDate)$(basename $0): creating output directory under $outputDir" +mkdir -p "$outputDir" # making the output directory + +# extract the start and end years +startYear="$(date --date="$startDate" +"%Y")" +endYear="$(date --date="$endDate" +"%Y")" +yearsRange=($(seq $startYear $endYear)) + +# if shapefile is provided extract the extents from it +if [[ -n $shapefile ]]; then + # extract the shapefile extent + IFS=' ' read -ra shapefileExtents <<< "$(ogrinfo -so -al "$shapefile" | sed 's/[),(]//g' | grep Extent)" + # transform the extents in case they are not in EPSG:4326 + IFS=':' read -ra sourceProj4 <<< "$(gdalsrsinfo $shapefile | grep -e "PROJ.4")" # source Proj4 value + if [[ -n $sourceProj4 ]]; then + : + else + echo "$(logDate)$(basename $0): WARNING! Assuming WSG84 CRS for the input ESRI shapefile" + sourceProj4=("PROJ.4" " +proj=longlat +datum=WGS84 +no_defs") # made an array for compatibility with the following statements + fi + + # transform limits and assing to variables + IFS=' ' read -ra leftBottomLims <<< $(echo "${shapefileExtents[@]:1:2}" | gdaltransform -s_srs "${sourceProj4[1]}" -t_srs EPSG:4326 -output_xy) + IFS=' ' read -ra rightTopLims <<< $(echo "${shapefileExtents[@]:4:5}" | gdaltransform -s_srs "${sourceProj4[1]}" -t_srs EPSG:4326 -output_xy) + # define $latLims and $lonLims from $shapefileExtents + lonLims="${leftBottomLims[0]},${rightTopLims[0]}" + latLims="${leftBottomLims[1]},${rightTopLims[1]}" +fi + +# build .vrt file out of annual MODIS HDFs for each of the $variables +echo "$(logDate)$(basename $0): building virtual format (.vrt) of MODIS HDFs under $cache" +for var in "${variables[@]}"; do + for yr in "${yearsRange[@]}"; do + # format year to conform to MODIS nomenclature + yrFormatted="${yr}.01.01" + + # create temporary directories for each variable + mkdir -p "${cache}/${var}" + + # make .vrt out of each variable's HDFs + # ATTENTION: the second argument is not contained with quotation marks + gdalbuildvrt "${cache}/${var}/${yr}.vrt" ${geotiffDir}/${var}/${yrFormatted}/*.hdf -resolution highest -sd 1 > /dev/null + + # reproject .vrt to the standard EPSG:4326 projection + gdalwarp -of VRT -t_srs "EPSG:$crs" "${cache}/${var}/${yr}.vrt" "${cache}/${var}/${yr}_${crs}.vrt" > /dev/null + done +done + +# subset and produce stats if needed +echo "$(logDate)$(basename $0): subsetting HDFs in GeoTIFF format under $outputDir" +# for each given year +for var in "${variables[@]}"; do + mkdir -p ${outputDir}/${var} + # for each year + for yr in "${yearsRange[@]}"; do + # subset based on lat and lon values + if [[ "$printGeotiff" == "true" ]]; then + subset_geotiff "${cache}/${var}/${yr}_${crs}.vrt" "${outputDir}/${var}/${prefix}${yr}.tif" + elif [[ "$printGeotiff" == "false" ]]; then + subset_geotiff "${cache}/${var}/${yr}_${crs}.vrt" "${cache}/${var}/${prefix}${yr}.tif" + fi + done +done + +## make R renv project directory +if [[ -n "$shapefile" ]] && [[ -n $stats ]]; then + echo "$(logDate)$(basename $0): Extracting stats under $outputDir" + mkdir -p "$cache/r-virtual-env/" + ## make R renv in $cache + virtualEnvPath="$cache/r-virtual-env/" + cp "$(dirname $0)/../assets/renv.lock" "$virtualEnvPath" + ## load necessary modules - excessive, mainly for clarification + load_core_modules + + for var in "${variables[@]}"; do + # extract given stats for each variable + + for yr in "${yearsRange[@]}"; do + # raster file path based on $printGeotiff value + if [[ "$printGeotiff" == "true" ]]; then + rasterPath="${outputDir}/${var}/${prefix}${yr}.tif" + elif [[ "$printGeotiff" == "false" ]]; then + rasterPath="${cache}/${var}/${prefix}${yr}.tif" + fi + + ## build renv and create stats + Rscript "$(dirname $0)/../assets/stats.R" \ + "$exactextractrCache" \ + "$renvPackagePath" \ + "$virtualEnvPath" \ + "$virtualEnvPath" \ + "${virtualEnvPath}/renv.lock" \ + "$rasterPath" \ + "$shapefile" \ + "$outputDir/${var}/${prefix}stats_${var}_${yr}.csv" \ + "$stats" \ + "$quantiles" >> "${outputDir}/${var}/${prefix}stats_${var}_${yr}.log" 2>&1; + done + done +fi + +# produce stats if required +mkdir "$HOME/empty_dir" +echo "$(logDate)$(basename $0): deleting temporary files from $cache" +rsync --quiet -aP --delete "$HOME/empty_dir/" "$cache" +rm -r "$cache" +echo "$(logDate)$(basename $0): temporary files from $cache are removed" +echo "$(logDate)$(basename $0): results are produced under $outputDir" + diff --git a/soil_grids/README.md b/soil_grids/README.md new file mode 100644 index 0000000..09e89a2 --- /dev/null +++ b/soil_grids/README.md @@ -0,0 +1,394 @@ +# `Soil Grids V1` Geospatial Dataset +In this file, the necessary technical details of the dataset is explained. + +## Location of the `Soil Grids V1` Dataset Files +The `Soil Grids V1` geospatial dataset files are located under the following directory accessible from Digital Alliance (formerly Compute Canada) Graham cluster: + +```console +/project/rpp-kshook/Model_Output/SoilGridsV1/soilgrids/former/2017-03-10/data +``` + +And the structure of the files is as following: + +```console +/project/rpp-kshook/Model_Output/SoilGridsV1/soilgrids/former/2017-03-10/data +├── ACDWRB_M_ss_250m_ll.tif +├── ACDWRB_M_ss_250m_ll.tif.xml +├── AWCh1_M_sl1_250m_ll.tif +├── AWCh1_M_sl1_250m_ll.tif.xml +├── AWCh1_M_sl2_250m_ll.tif +├── AWCh1_M_sl2_250m_ll.tif.xml +├── %var_M_sl{%depth_level}_250m_ll.tif +├── %var_M_sl{%depth_level}_250m_ll.tif.xml +├── . +├── . +├── . +├── AWCh1_M_sl7_250m_ll.tif +├── AWCh1_M_sl7_250m_ll.tif.xml +├── . +├── . +├── . +├── SNDPPT_M_sl7_250m_ll.tif +├── SNDPPT_M_sl7_250m_ll.tif.xml +├── WWP_M_sl7_250m_ll.tif +├── WWP_M_sl7_250m_ll.tif.xml +├── . +├── . +├── . +├── TAXNWRB_250m_ll.tif +├── TAXNWRB_250m_ll.tif.xml +├── TAXNWRB_Acric.Ferralsols_250m_ll.tif +├── TAXNWRB_Acric.Ferralsols_250m_ll.tif.xml +├── TAXNWRB_Acric.Plinthosols_250m_ll.tif +├── TAXNWRB_Acric.Plinthosols_250m_ll.tif.xml +├── . +├── . +├── . +├── TAXNWRB_%{short_group_name}_250m_ll.tif +├── TAXNWRB_%{short_group_name}_250m_ll.tif.xml +├── . +├── . +├── . +├── TAXNWRB_Vitric.Cryosols_250m_ll.tif +├── TAXNWRB_Vitric.Cryosols_250m_ll.tif.xml +├── TAXOUSDA_250m_ll.tif +├── TAXOUSDA_250m_ll.tif.xml +├── TAXOUSDA_Albolls_250m_ll.tif +├── TAXOUSDA_Albolls_250m_ll.tif.xml +├── . +├── . +├── . +├── TAXOUSDA_%{group_name}_250m_ll.tif +├── TAXOUSDA_${group_name}_250m_ll.tif.xml +├── . +├── . +├── . +├── TAXOUSDA_Xerults_250m_ll.tif +└── TAXOUSDA_Xerults_250m_ll.tif.xml +``` + +## Spatial and Temporal Extents + +The spatial extent of this dataset covers longitudes from `-180` to `+180` degress and latitudes from `-90` to `+90` degress. This dataset is static and does not vary with time. + +## Dataset Variables +This variables of this dataset are detailed in the table below: + +|# |Variable Name (used in `gistool`) |Description |Comments | +|-------|---------------------------------------|---------------------------------------|---------------| +|1 |BDRICM_M_250m_ll |Depth to bedrock (R horizon) up to 200 cm | | +|2 |BDRLOG_M_250m_ll |Probability of occurrence of R horizon | | +|3 |BDTICM_M_250m_ll |Absolute depth to bedrock (in cm) | | +|4 |BLDFIE_M_sl1_250m_ll |Bulk density (fine earth) in kg / cubic-meter | | +|5 |BLDFIE_M_sl2_250m_ll |Bulk density (fine earth) in kg / cubic-meter | | +|6 |BLDFIE_M_sl3_250m_ll |Bulk density (fine earth) in kg / cubic-meter | | +|7 |BLDFIE_M_sl4_250m_ll |Bulk density (fine earth) in kg / cubic-meter | | +|8 |BLDFIE_M_sl5_250m_ll |Bulk density (fine earth) in kg / cubic-meter | | +|9 |BLDFIE_M_sl6_250m_ll |Bulk density (fine earth) in kg / cubic-meter | | +|10 |BLDFIE_M_sl7_250m_ll |Bulk density (fine earth) in kg / cubic-meter | | +|11 |CECSOL_M_sl1_250m_ll |Cation exchange capacity of soil in cmolc/kg | | +|12 |CECSOL_M_sl2_250m_ll |Cation exchange capacity of soil in cmolc/kg | | +|13 |CECSOL_M_sl3_250m_ll |Cation exchange capacity of soil in cmolc/kg | | +|14 |CECSOL_M_sl4_250m_ll |Cation exchange capacity of soil in cmolc/kg | | +|15 |CECSOL_M_sl5_250m_ll |Cation exchange capacity of soil in cmolc/kg | | +|16 |CECSOL_M_sl6_250m_ll |Cation exchange capacity of soil in cmolc/kg | | +|17 |CECSOL_M_sl7_250m_ll |Cation exchange capacity of soil in cmolc/kg | | +|18 |CLYPPT_M_sl1_250m_ll |Clay content (0-2 micro meter) mass fraction in % | | +|19 |CLYPPT_M_sl2_250m_ll |Clay content (0-2 micro meter) mass fraction in % | | +|20 |CLYPPT_M_sl3_250m_ll |Clay content (0-2 micro meter) mass fraction in % | | +|21 |CLYPPT_M_sl4_250m_ll |Clay content (0-2 micro meter) mass fraction in % | | +|22 |CLYPPT_M_sl5_250m_ll |Clay content (0-2 micro meter) mass fraction in % | | +|23 |CLYPPT_M_sl6_250m_ll |Clay content (0-2 micro meter) mass fraction in % | | +|24 |CLYPPT_M_sl7_250m_ll |Clay content (0-2 micro meter) mass fraction in % | | +|25 |CRFVOL_M_sl1_250m_ll |Coarse fragments volumetric in % | | +|26 |CRFVOL_M_sl2_250m_ll |Coarse fragments volumetric in % | | +|27 |CRFVOL_M_sl3_250m_ll |Coarse fragments volumetric in % | | +|28 |CRFVOL_M_sl4_250m_ll |Coarse fragments volumetric in % | | +|29 |CRFVOL_M_sl5_250m_ll |Coarse fragments volumetric in % | | +|30 |CRFVOL_M_sl6_250m_ll |Coarse fragments volumetric in % | | +|31 |CRFVOL_M_sl7_250m_ll |Coarse fragments volumetric in % | | +|32 |OCSTHA_M_sd1_250m_ll |Soil organic carbon stock in tons per ha | | +|33 |OCSTHA_M_sd2_250m_ll |Soil organic carbon stock in tons per ha | | +|34 |OCSTHA_M_sd3_250m_ll |Soil organic carbon stock in tons per ha | | +|35 |OCSTHA_M_sd4_250m_ll |Soil organic carbon stock in tons per ha | | +|36 |OCSTHA_M_sd5_250m_ll |Soil organic carbon stock in tons per ha | | +|37 |OCSTHA_M_sd6_250m_ll |Soil organic carbon stock in tons per ha | | +|38 |OCSTHA_M_30cm_250m_ll |Soil organic carbon stock in tons per ha | | +|39 |OCSTHA_M_100cm_250m_ll |Soil organic carbon stock in tons per ha | | +|40 |OCSTHA_M_200cm_250m_ll |Soil organic carbon stock in tons per ha | | +|41 |OCDENS_M_sl1_250m_ll |Soil organic carbon density in kg per cubic-m | | +|42 |OCDENS_M_sl2_250m_ll |Soil organic carbon density in kg per cubic-m | | +|43 |OCDENS_M_sl3_250m_ll |Soil organic carbon density in kg per cubic-m | | +|44 |OCDENS_M_sl4_250m_ll |Soil organic carbon density in kg per cubic-m | | +|45 |OCDENS_M_sl5_250m_ll |Soil organic carbon density in kg per cubic-m | | +|46 |OCDENS_M_sl6_250m_ll |Soil organic carbon density in kg per cubic-m | | +|47 |OCDENS_M_sl7_250m_ll |Soil organic carbon density in kg per cubic-m | | +|48 |ORCDRC_M_sl1_250m_ll |Soil organic carbon content (fine earth fraction) in g per kg | | +|49 |ORCDRC_M_sl2_250m_ll |Soil organic carbon content (fine earth fraction) in g per kg | | +|50 |ORCDRC_M_sl3_250m_ll |Soil organic carbon content (fine earth fraction) in g per kg | | +|51 |ORCDRC_M_sl4_250m_ll |Soil organic carbon content (fine earth fraction) in g per kg | | +|52 |ORCDRC_M_sl5_250m_ll |Soil organic carbon content (fine earth fraction) in g per kg | | +|53 |ORCDRC_M_sl6_250m_ll |Soil organic carbon content (fine earth fraction) in g per kg | | +|54 |ORCDRC_M_sl7_250m_ll |Soil organic carbon content (fine earth fraction) in g per kg | | +|55 |PHIHOX_M_sl1_250m_ll |Soil pH x 10 in H2O | | +|56 |PHIHOX_M_sl2_250m_ll |Soil pH x 10 in H2O | | +|57 |PHIHOX_M_sl3_250m_ll |Soil pH x 10 in H2O | | +|58 |PHIHOX_M_sl4_250m_ll |Soil pH x 10 in H2O | | +|59 |PHIHOX_M_sl5_250m_ll |Soil pH x 10 in H2O | | +|60 |PHIHOX_M_sl6_250m_ll |Soil pH x 10 in H2O | | +|61 |PHIHOX_M_sl7_250m_ll |Soil pH x 10 in H2O | | +|62 |PHIKCL_M_sl1_250m_ll |Soil pH x 10 in KCl | | +|63 |PHIKCL_M_sl2_250m_ll |Soil pH x 10 in KCl | | +|64 |PHIKCL_M_sl3_250m_ll |Soil pH x 10 in KCl | | +|65 |PHIKCL_M_sl4_250m_ll |Soil pH x 10 in KCl | | +|66 |PHIKCL_M_sl5_250m_ll |Soil pH x 10 in KCl | | +|67 |PHIKCL_M_sl6_250m_ll |Soil pH x 10 in KCl | | +|68 |PHIKCL_M_sl7_250m_ll |Soil pH x 10 in KCl | | +|69 |SLTPPT_M_sl1_250m_ll |Silt content (2-50 micro meter) mass fraction in % | | +|70 |SLTPPT_M_sl2_250m_ll |Silt content (2-50 micro meter) mass fraction in % | | +|71 |SLTPPT_M_sl3_250m_ll |Silt content (2-50 micro meter) mass fraction in % | | +|72 |SLTPPT_M_sl4_250m_ll |Silt content (2-50 micro meter) mass fraction in % | | +|73 |SLTPPT_M_sl5_250m_ll |Silt content (2-50 micro meter) mass fraction in % | | +|74 |SLTPPT_M_sl6_250m_ll |Silt content (2-50 micro meter) mass fraction in % | | +|75 |SLTPPT_M_sl7_250m_ll |Silt content (2-50 micro meter) mass fraction in % | | +|76 |SNDPPT_M_sl1_250m_ll |Sand content (50-2000 micro meter) mass fraction in % | | +|77 |SNDPPT_M_sl2_250m_ll |Sand content (50-2000 micro meter) mass fraction in % | | +|78 |SNDPPT_M_sl3_250m_ll |Sand content (50-2000 micro meter) mass fraction in % | | +|79 |SNDPPT_M_sl4_250m_ll |Sand content (50-2000 micro meter) mass fraction in % | | +|80 |SNDPPT_M_sl5_250m_ll |Sand content (50-2000 micro meter) mass fraction in % | | +|81 |SNDPPT_M_sl6_250m_ll |Sand content (50-2000 micro meter) mass fraction in % | | +|82 |SNDPPT_M_sl7_250m_ll |Sand content (50-2000 micro meter) mass fraction in % | | +|83 |TEXMHT_M_sl1_250m_ll |Texture class (USDA system) | | +|84 |TEXMHT_M_sl2_250m_ll |Texture class (USDA system) | | +|85 |TEXMHT_M_sl3_250m_ll |Texture class (USDA system) | | +|86 |TEXMHT_M_sl4_250m_ll |Texture class (USDA system) | | +|87 |TEXMHT_M_sl5_250m_ll |Texture class (USDA system) | | +|88 |TEXMHT_M_sl6_250m_ll |Texture class (USDA system) | | +|89 |TEXMHT_M_sl7_250m_ll |Texture class (USDA system) | | +|90 |TAXNWRB_250m_ll |WRB 2006 class | | +|91 |TAXNWRB_Acric.Ferralsols_250m_ll |WRB 2006 class | | +|92 |TAXNWRB_Acric.Plinthosols_250m_ll |WRB 2006 class | | +|93 |TAXNWRB_Albic.Arenosols_250m_ll |WRB 2006 class | | +|94 |TAXNWRB_Albic.Luvisols_250m_ll |WRB 2006 class | | +|95 |TAXNWRB_Alic.Nitisols_250m_ll |WRB 2006 class | | +|96 |TAXNWRB_Aluandic.Andosols_250m_ll |WRB 2006 class | | +|97 |TAXNWRB_Aric.Regosols_250m_ll |WRB 2006 class | | +|98 |TAXNWRB_Calcaric.Regosols_250m_ll |WRB 2006 class | | +|99 |TAXNWRB_Calcic.Chernozems_250m_ll |WRB 2006 class | | +|100 |TAXNWRB_Calcic.Gleysols_250m_ll |WRB 2006 class | | +|101 |TAXNWRB_Calcic.Gypsisols_250m_ll |WRB 2006 class | | +|102 |TAXNWRB_Calcic.Histosols_250m_ll |WRB 2006 class | | +|103 |TAXNWRB_Calcic.Kastanozems_250m_ll |WRB 2006 class | | +|104 |TAXNWRB_Calcic.Luvisols_250m_ll |WRB 2006 class | | +|105 |TAXNWRB_Calcic.Solonetz_250m_ll |WRB 2006 class | | +|106 |TAXNWRB_Calcic.Vertisols_250m_ll |WRB 2006 class | | +|107 |TAXNWRB_Cryic.Histosols_250m_ll |WRB 2006 class | | +|108 |TAXNWRB_Cutanic.Alisols_250m_ll |WRB 2006 class | | +|109 |TAXNWRB_Endogleyic.Cambisols_250m_ll |WRB 2006 class | | +|110 |TAXNWRB_Endogleyic.Planosols_250m_ll |WRB 2006 class | | +|111 |TAXNWRB_Ferralic.Arenosols_250m_ll |WRB 2006 class | | +|112 |TAXNWRB_Ferralic.Cambisols_250m_ll |WRB 2006 class | | +|113 |TAXNWRB_Fibric.Histosols_250m_ll |WRB 2006 class | | +|114 |TAXNWRB_Gleyic.Luvisols_250m_ll |WRB 2006 class | | +|115 |TAXNWRB_Gleyic.Podzols_250m_ll |WRB 2006 class | | +|116 |TAXNWRB_Gleyic.Solonetz_250m_ll |WRB 2006 class | | +|117 |TAXNWRB_Gypsic.Solonchaks_250m_ll |WRB 2006 class | | +|118 |TAXNWRB_Haplic.Acrisols_250m_ll |WRB 2006 class | | +|119 |TAXNWRB_Haplic.Acrisols..Alumic._250m_ll |WRB 2006 class | | +|120 |TAXNWRB_Haplic.Acrisols..Ferric._250m_ll |WRB 2006 class | | +|121 |TAXNWRB_Haplic.Acrisols..Humic._250m_ll |WRB 2006 class | | +|122 |TAXNWRB_Haplic.Albeluvisols_250m_ll |WRB 2006 class | | +|123 |TAXNWRB_Haplic.Alisols_250m_ll |WRB 2006 class | | +|124 |TAXNWRB_Haplic.Andosols_250m_ll |WRB 2006 class | | +|125 |TAXNWRB_Haplic.Arenosols_250m_ll |WRB 2006 class | | +|126 |TAXNWRB_Haplic.Arenosols..Calcaric._250m_ll |WRB 2006 class | | +|127 |TAXNWRB_Haplic.Calcisols_250m_ll |WRB 2006 class | | +|128 |TAXNWRB_Haplic.Calcisols..Sodic._250m_ll |WRB 2006 class | | +|129 |TAXNWRB_Haplic.Cambisols_250m_ll |WRB 2006 class | | +|130 |TAXNWRB_Haplic.Cambisols..Calcaric._250m_ll |WRB 2006 class | | +|131 |TAXNWRB_Haplic.Cambisols..Chromic._250m_ll |WRB 2006 class | | +|132 |TAXNWRB_Haplic.Cambisols..Dystric._250m_ll |WRB 2006 class | | +|133 |TAXNWRB_Haplic.Cambisols..Eutric._250m_ll |WRB 2006 class | | +|134 |TAXNWRB_Haplic.Cambisols..Humic._250m_ll |WRB 2006 class | | +|135 |TAXNWRB_Haplic.Cambisols..Sodic._250m_ll |WRB 2006 class | | +|136 |TAXNWRB_Haplic.Chernozems_250m_ll |WRB 2006 class | | +|137 |TAXNWRB_Haplic.Cryosols_250m_ll |WRB 2006 class | | +|138 |TAXNWRB_Haplic.Ferralsols_250m_ll |WRB 2006 class | | +|139 |TAXNWRB_Haplic.Ferralsols..Rhodic._250m_ll |WRB 2006 class | | +|140 |TAXNWRB_Haplic.Ferralsols..Xanthic._250m_ll |WRB 2006 class | | +|141 |TAXNWRB_Haplic.Fluvisols_250m_ll |WRB 2006 class | | +|142 |TAXNWRB_Haplic.Fluvisols..Arenic._250m_ll |WRB 2006 class | | +|143 |TAXNWRB_Haplic.Fluvisols..Calcaric._250m_ll |WRB 2006 class | | +|144 |TAXNWRB_Haplic.Fluvisols..Dystric._250m_ll |WRB 2006 class | | +|145 |TAXNWRB_Haplic.Fluvisols..Eutric._250m_ll |WRB 2006 class | | +|146 |TAXNWRB_Haplic.Gleysols_250m_ll |WRB 2006 class | | +|147 |TAXNWRB_Haplic.Gleysols..Dystric._250m_ll |WRB 2006 class | | +|148 |TAXNWRB_Haplic.Gleysols..Eutric._250m_ll |WRB 2006 class | | +|149 |TAXNWRB_Haplic.Gypsisols_250m_ll |WRB 2006 class | | +|150 |TAXNWRB_Haplic.Kastanozems_250m_ll |WRB 2006 class | | +|151 |TAXNWRB_Haplic.Leptosols_250m_ll |WRB 2006 class | | +|152 |TAXNWRB_Haplic.Leptosols..Eutric._250m_ll |WRB 2006 class | | +|153 |TAXNWRB_Haplic.Lixisols_250m_ll |WRB 2006 class | | +|154 |TAXNWRB_Haplic.Lixisols..Chromic._250m_ll |WRB 2006 class | | +|155 |TAXNWRB_Haplic.Lixisols..Ferric._250m_ll |WRB 2006 class | | +|156 |TAXNWRB_Haplic.Luvisols_250m_ll |WRB 2006 class | | +|157 |TAXNWRB_Haplic.Luvisols..Chromic._250m_ll |WRB 2006 class | | +|158 |TAXNWRB_Haplic.Luvisols..Ferric._250m_ll |WRB 2006 class | | +|159 |TAXNWRB_Haplic.Nitisols..Rhodic._250m_ll |WRB 2006 class | | +|160 |TAXNWRB_Haplic.Phaeozems_250m_ll |WRB 2006 class | | +|161 |TAXNWRB_Haplic.Planosols..Dystric._250m_ll |WRB 2006 class | | +|162 |TAXNWRB_Haplic.Planosols..Eutric._250m_ll |WRB 2006 class | | +|163 |TAXNWRB_Haplic.Podzols_250m_ll |WRB 2006 class | | +|164 |TAXNWRB_Haplic.Regosols..Dystric._250m_ll |WRB 2006 class | | +|165 |TAXNWRB_Haplic.Regosols..Eutric._250m_ll |WRB 2006 class | | +|166 |TAXNWRB_Haplic.Regosols..Sodic._250m_ll |WRB 2006 class | | +|167 |TAXNWRB_Haplic.Solonchaks_250m_ll |WRB 2006 class | | +|168 |TAXNWRB_Haplic.Solonchaks..Sodic._250m_ll |WRB 2006 class | | +|169 |TAXNWRB_Haplic.Solonetz_250m_ll |WRB 2006 class | | +|170 |TAXNWRB_Haplic.Umbrisols_250m_ll |WRB 2006 class | | +|171 |TAXNWRB_Haplic.Vertisols_250m_ll |WRB 2006 class | | +|172 |TAXNWRB_Haplic.Vertisols..Eutric._250m_ll |WRB 2006 class | | +|173 |TAXNWRB_Hemic.Histosols_250m_ll |WRB 2006 class | | +|174 |TAXNWRB_Histic.Albeluvisols_250m_ll |WRB 2006 class | | +|175 |TAXNWRB_Hypoluvic.Arenosols_250m_ll |WRB 2006 class | | +|176 |TAXNWRB_Leptic.Cambisols_250m_ll |WRB 2006 class | | +|177 |TAXNWRB_Leptic.Luvisols_250m_ll |WRB 2006 class | | +|178 |TAXNWRB_Leptic.Phaeozems_250m_ll |WRB 2006 class | | +|179 |TAXNWRB_Leptic.Regosols_250m_ll |WRB 2006 class | | +|180 |TAXNWRB_Leptic.Umbrisols_250m_ll |WRB 2006 class | | +|181 |TAXNWRB_Lithic.Leptosols_250m_ll |WRB 2006 class | | +|182 |TAXNWRB_Lixic.Plinthosols_250m_ll |WRB 2006 class | | +|183 |TAXNWRB_Luvic.Calcisols_250m_ll |WRB 2006 class | | +|184 |TAXNWRB_Luvic.Chernozems_250m_ll |WRB 2006 class | | +|185 |TAXNWRB_Luvic.Phaeozems_250m_ll |WRB 2006 class | | +|186 |TAXNWRB_Luvic.Planosols_250m_ll |WRB 2006 class | | +|187 |TAXNWRB_Luvic.Stagnosols_250m_ll |WRB 2006 class | | +|188 |TAXNWRB_Mollic.Gleysols_250m_ll |WRB 2006 class | | +|189 |TAXNWRB_Mollic.Leptosols_250m_ll |WRB 2006 class | | +|190 |TAXNWRB_Mollic.Solonetz_250m_ll |WRB 2006 class | | +|191 |TAXNWRB_Mollic.Vertisols_250m_ll |WRB 2006 class | | +|192 |TAXNWRB_Petric.Calcisols_250m_ll |WRB 2006 class | | +|193 |TAXNWRB_Petric.Durisols_250m_ll |WRB 2006 class | | +|194 |TAXNWRB_Plinthic.Acrisols_250m_ll |WRB 2006 class | | +|195 |TAXNWRB_Protic.Arenosols_250m_ll |WRB 2006 class | | +|196 |TAXNWRB_Rendzic.Leptosols_250m_ll |WRB 2006 class | | +|197 |TAXNWRB_Sapric.Histosols_250m_ll |WRB 2006 class | | +|198 |TAXNWRB_Solodic.Planosols_250m_ll |WRB 2006 class | | +|199 |TAXNWRB_Stagnic.Luvisols_250m_ll |WRB 2006 class | | +|200 |TAXNWRB_Turbic.Cryosols_250m_ll |WRB 2006 class | | +|201 |TAXNWRB_Umbric.Albeluvisols_250m_ll |WRB 2006 class | | +|202 |TAXNWRB_Umbric.Ferralsols_250m_ll |WRB 2006 class | | +|203 |TAXNWRB_Umbric.Gleysols_250m_ll |WRB 2006 class | | +|204 |TAXNWRB_Vertic.Cambisols_250m_ll |WRB 2006 class | | +|205 |TAXNWRB_Vertic.Luvisols_250m_ll |WRB 2006 class | | +|206 |TAXNWRB_Vetic.Acrisols_250m_ll |WRB 2006 class | | +|207 |TAXNWRB_Vitric.Andosols_250m_ll |WRB 2006 class | | +|208 |TAXNWRB_Vitric.Cryosols_250m_ll |WRB 2006 class | | +|209 |TAXOUSDA_250m_ll |USDA 2014 class | | +|210 |TAXOUSDA_Albolls_250m_ll |USDA 2014 class | | +|211 |TAXOUSDA_Aqualfs_250m_ll |USDA 2014 class | | +|212 |TAXOUSDA_Aquands_250m_ll |USDA 2014 class | | +|213 |TAXOUSDA_Aquents_250m_ll |USDA 2014 class | | +|214 |TAXOUSDA_Aquepts_250m_ll |USDA 2014 class | | +|215 |TAXOUSDA_Aquerts_250m_ll |USDA 2014 class | | +|216 |TAXOUSDA_Aquods_250m_ll |USDA 2014 class | | +|217 |TAXOUSDA_Aquolls_250m_ll |USDA 2014 class | | +|218 |TAXOUSDA_Aquox_250m_ll |USDA 2014 class | | +|219 |TAXOUSDA_Aquults_250m_ll |USDA 2014 class | | +|220 |TAXOUSDA_Arents_250m_ll |USDA 2014 class | | +|221 |TAXOUSDA_Argids_250m_ll |USDA 2014 class | | +|222 |TAXOUSDA_Borolls_250m_ll |USDA 2014 class | | +|223 |TAXOUSDA_Calcids_250m_ll |USDA 2014 class | | +|224 |TAXOUSDA_Cambids_250m_ll |USDA 2014 class | | +|225 |TAXOUSDA_Cryalfs_250m_ll |USDA 2014 class | | +|226 |TAXOUSDA_Cryands_250m_ll |USDA 2014 class | | +|227 |TAXOUSDA_Cryepts_250m_ll |USDA 2014 class | | +|228 |TAXOUSDA_Cryids_250m_ll |USDA 2014 class | | +|229 |TAXOUSDA_Cryods_250m_ll |USDA 2014 class | | +|230 |TAXOUSDA_Cryolls_250m_ll |USDA 2014 class | | +|231 |TAXOUSDA_Durids_250m_ll |USDA 2014 class | | +|232 |TAXOUSDA_Fibrists_250m_ll |USDA 2014 class | | +|233 |TAXOUSDA_Fluvents_250m_ll |USDA 2014 class | | +|234 |TAXOUSDA_Folists_250m_ll |USDA 2014 class | | +|235 |TAXOUSDA_Gelands_250m_ll |USDA 2014 class | | +|236 |TAXOUSDA_Gelepts_250m_ll |USDA 2014 class | | +|237 |TAXOUSDA_Gelods_250m_ll |USDA 2014 class | | +|238 |TAXOUSDA_Gypsids_250m_ll |USDA 2014 class | | +|239 |TAXOUSDA_Hemists_250m_ll |USDA 2014 class | | +|240 |TAXOUSDA_Histels_250m_ll |USDA 2014 class | | +|241 |TAXOUSDA_Humods_250m_ll |USDA 2014 class | | +|242 |TAXOUSDA_Humults_250m_ll |USDA 2014 class | | +|243 |TAXOUSDA_Ochrepts_250m_ll |USDA 2014 class | | +|244 |TAXOUSDA_Orthels_250m_ll |USDA 2014 class | | +|245 |TAXOUSDA_Orthents_250m_ll |USDA 2014 class | | +|246 |TAXOUSDA_Orthods_250m_ll |USDA 2014 class | | +|247 |TAXOUSDA_Perox_250m_ll |USDA 2014 class | | +|248 |TAXOUSDA_Psamments_250m_ll |USDA 2014 class | | +|249 |TAXOUSDA_Rendolls_250m_ll |USDA 2014 class | | +|250 |TAXOUSDA_Salids_250m_ll |USDA 2014 class | | +|251 |TAXOUSDA_Saprists_250m_ll |USDA 2014 class | | +|252 |TAXOUSDA_Torrands_250m_ll |USDA 2014 class | | +|253 |TAXOUSDA_Torrerts_250m_ll |USDA 2014 class | | +|254 |TAXOUSDA_Torrox_250m_ll |USDA 2014 class | | +|255 |TAXOUSDA_Turbels_250m_ll |USDA 2014 class | | +|256 |TAXOUSDA_Udalfs_250m_ll |USDA 2014 class | | +|257 |TAXOUSDA_Udands_250m_ll |USDA 2014 class | | +|258 |TAXOUSDA_Udepts_250m_ll |USDA 2014 class | | +|259 |TAXOUSDA_Uderts_250m_ll |USDA 2014 class | | +|260 |TAXOUSDA_Udolls_250m_ll |USDA 2014 class | | +|261 |TAXOUSDA_Udox_250m_ll |USDA 2014 class | | +|262 |TAXOUSDA_Udults_250m_ll |USDA 2014 class | | +|263 |TAXOUSDA_Ustalfs_250m_ll |USDA 2014 class | | +|264 |TAXOUSDA_Ustands_250m_ll |USDA 2014 class | | +|265 |TAXOUSDA_Ustepts_250m_ll |USDA 2014 class | | +|266 |TAXOUSDA_Usterts_250m_ll |USDA 2014 class | | +|267 |TAXOUSDA_Ustolls_250m_ll |USDA 2014 class | | +|268 |TAXOUSDA_Ustox_250m_ll |USDA 2014 class | | +|269 |TAXOUSDA_Ustults_250m_ll |USDA 2014 class | | +|270 |TAXOUSDA_Vitrands_250m_ll |USDA 2014 class | | +|271 |TAXOUSDA_Xeralfs_250m_ll |USDA 2014 class | | +|272 |TAXOUSDA_Xerands_250m_ll |USDA 2014 class | | +|273 |TAXOUSDA_Xerepts_250m_ll |USDA 2014 class | | +|274 |TAXOUSDA_Xererts_250m_ll |USDA 2014 class | | +|275 |TAXOUSDA_Xerolls_250m_ll |USDA 2014 class | | +|276 |TAXOUSDA_Xerults_250m_ll |USDA 2014 class | | +|277 |AWCh1_M_sl1_250m_ll |Available soil water capacity (volumetric fraction) for h1 | | +|278 |AWCh1_M_sl2_250m_ll |Available soil water capacity (volumetric fraction) for h1 | | +|279 |AWCh1_M_sl3_250m_ll |Available soil water capacity (volumetric fraction) for h1 | | +|280 |AWCh1_M_sl4_250m_ll |Available soil water capacity (volumetric fraction) for h1 | | +|281 |AWCh1_M_sl5_250m_ll |Available soil water capacity (volumetric fraction) for h1 | | +|282 |AWCh1_M_sl6_250m_ll |Available soil water capacity (volumetric fraction) for h1 | | +|283 |AWCh1_M_sl7_250m_ll |Available soil water capacity (volumetric fraction) for h1 | | +|284 |AWCh2_M_sl1_250m_ll |Available soil water capacity (volumetric fraction) for h2 | | +|285 |AWCh2_M_sl2_250m_ll |Available soil water capacity (volumetric fraction) for h2 | | +|286 |AWCh2_M_sl3_250m_ll |Available soil water capacity (volumetric fraction) for h2 | | +|287 |AWCh2_M_sl4_250m_ll |Available soil water capacity (volumetric fraction) for h2 | | +|288 |AWCh2_M_sl5_250m_ll |Available soil water capacity (volumetric fraction) for h2 | | +|289 |AWCh2_M_sl6_250m_ll |Available soil water capacity (volumetric fraction) for h2 | | +|290 |AWCh2_M_sl7_250m_ll |Available soil water capacity (volumetric fraction) for h2 | | +|291 |AWCh3_M_sl1_250m_ll |Available soil water capacity (volumetric fraction) for h3 | | +|292 |AWCh3_M_sl2_250m_ll |Available soil water capacity (volumetric fraction) for h3 | | +|293 |AWCh3_M_sl3_250m_ll |Available soil water capacity (volumetric fraction) for h3 | | +|294 |AWCh3_M_sl4_250m_ll |Available soil water capacity (volumetric fraction) for h3 | | +|295 |AWCh3_M_sl5_250m_ll |Available soil water capacity (volumetric fraction) for h3 | | +|296 |AWCh3_M_sl6_250m_ll |Available soil water capacity (volumetric fraction) for h3 | | +|297 |AWCh3_M_sl7_250m_ll |Available soil water capacity (volumetric fraction) for h3 | | +|298 |WWP_M_sl1_250m_ll |Available soil water capacity (volumetric fraction) until wilting point| | +|299 |WWP_M_sl2_250m_ll |Available soil water capacity (volumetric fraction) until wilting point| | +|300 |WWP_M_sl3_250m_ll |Available soil water capacity (volumetric fraction) until wilting point| | +|301 |WWP_M_sl4_250m_ll |Available soil water capacity (volumetric fraction) until wilting point| | +|302 |WWP_M_sl5_250m_ll |Available soil water capacity (volumetric fraction) until wilting point| | +|303 |WWP_M_sl6_250m_ll |Available soil water capacity (volumetric fraction) until wilting point| | +|304 |WWP_M_sl7_250m_ll |Available soil water capacity (volumetric fraction) until wilting point| | +|305 |AWCtS_M_sl1_250m_ll |Saturated water content (volumetric fraction) for tS | | +|306 |AWCtS_M_sl2_250m_ll |Saturated water content (volumetric fraction) for tS | | +|307 |AWCtS_M_sl3_250m_ll |Saturated water content (volumetric fraction) for tS | | +|308 |AWCtS_M_sl4_250m_ll |Saturated water content (volumetric fraction) for tS | | +|309 |AWCtS_M_sl5_250m_ll |Saturated water content (volumetric fraction) for tS | | +|310 |AWCtS_M_sl6_250m_ll |Saturated water content (volumetric fraction) for tS | | +|311 |AWCtS_M_sl7_250m_ll |Saturated water content (volumetric fraction) for tS | | +|312 |HISTPR_250m_ll |Histosols probability cumulative | | +|313 |SLGWRB_250m_ll |Sodic soil grade | | +|314 |ACDWRB_M_ss_250m_ll |Acid sub-soils grade | | + +Apart from the table above, a complete description of the variables are found in the following files available on the hosting `Soil Grids V1` website: 1) [`META_GEOTIFF_1B.csv`](https://files.isric.org/soilgrids/former/2017-03-10/data/META_GEOTIFF_1B.csv) that details all the files included in the dataset, 2) [`TAXNWRB_250m_ll.tif.csv`](https://files.isric.org/soilgrids/former/2017-03-10/data/TAXNWRB_250m_ll.tif.csv), and 3) [`TAXOUSDA_250m_ll.tif.csv`](https://files.isric.org/soilgrids/former/2017-03-10/data/TAXOUSDA_250m_ll.tif.csv). diff --git a/soil_grids/soil_grids_v1.sh b/soil_grids/soil_grids_v1.sh new file mode 100755 index 0000000..6a0ab2a --- /dev/null +++ b/soil_grids/soil_grids_v1.sh @@ -0,0 +1,279 @@ +#!/bin/bash +# GIS Data Processing Workflow +# Copyright (C) 2022, University of Saskatchewan +# Copyright (C) 2021, Wouter Knoben +# +# This file is part of GIS Data Processing Workflow +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# ========================= +# Credits and contributions +# ========================= +# 1. Parts of the code are taken from https://www.shellscript.sh/tips/getopt/index.html +# 2. General ideas of GeoTIFF subsetting are taken from https://github.com/CH-Earth/CWARHM +# developed mainly by Wouter Knoben (hence the header copyright credit). See the preprint +# at: https://www.essoar.org/doi/10.1002/essoar.10509195.1 + + +# ================ +# General comments +# ================ +# * All variables are camelCased for distinguishing from function names; +# * function names are all in lower_case with words seperated by underscore for legibility; +# * shell style is based on Google Open Source Projects' +# Style Guide: https://google.github.io/styleguide/shellguide.html + + +# =============== +# Usage Functions +# =============== +short_usage() { + echo "usage: $(basename $0) -cio DIR -v var1[,var2[...]] [-r INT] [-se DATE] [-ln REAL,REAL] [-f PATH] [-t BOOL] [-a stat1[,stat2,[...]] [-q q1[,q2[...]]]] [-p STR] " +} + + +# argument parsing using getopt - WORKS ONLY ON LINUX BY DEFAULT +parsedArguments=$(getopt -a -n soil-grids-v1 -o i:o:v:r:s:e:l:n:f:t:a:q:p:c: --long dataset-dir:,output-dir:,variable:,crs:,start-date:,end-date:,lat-lims:,lon-lims:,shape-file:,print-geotiff:,stat:,quantile:,prefix:,cache: -- "$@") +validArguments=$? +if [ "$validArguments" != "0" ]; then + short_usage; + exit 1; +fi + +# check if no options were passed +if [ $# -eq 0 ]; then + echo "$(basename $0): ERROR! arguments missing"; + exit 1; +fi + +# check long and short options passed +eval set -- "$parsedArguments" +while : +do + case "$1" in + -i | --dataset-dir) geotiffDir="$2" ; shift 2 ;; # required + -o | --output-dir) outputDir="$2" ; shift 2 ;; # required + -v | --variable) variables="$2" ; shift 2 ;; # required + -r | --crs) crs="$2" ; shift 2 ;; # required + -s | --start-date) startDate="$2" ; shift 2 ;; # redundant - added for compatibility + -e | --end-date) endDate="$2" ; shift 2 ;; # redundant - added for compatibility + -l | --lat-lims) latLims="$2" ; shift 2 ;; # required - could be redundant + -n | --lon-lims) lonLims="$2" ; shift 2 ;; # required - could be redundant + -f | --shape-file) shapefile="$2" ; shift 2 ;; # required - could be redundant + -t | --print-geotiff) printGeotiff="$2" ; shift 2 ;; # required + -a | --stat) stats="$2" ; shift 2 ;; # optional + -q | --quantile) quantiles="$2" ; shift 2 ;; # optional + -p | --prefix) prefix="$2" ; shift 2 ;; # optional + -c | --cache) cache="$2" ; shift 2 ;; # required + + # -- means the end of the arguments; drop this, and break out of the while loop + --) shift; break ;; + + # in case of invalid option + *) + echo "$(basename $0): ERROR! invalid option '$1'"; + short_usage; exit 1 ;; + esac +done + +# check if $ensemble is provided +if [[ -n "$startDate" ]] || [[ -n "$endDate" ]]; then + echo "$(basename $0): ERROR! redundant argument (time extents) provided"; + exit 1; +fi + +# check the prefix if not set +if [[ -z $prefix ]]; then + prefix="soil_grids_v1_" +fi + +# parse comma-delimited variables +IFS=',' read -ra variables <<< "${variables}" + + +# ===================== +# Necessary Assumptions +# ===================== +# TZ to be set to UTC to avoid invalid dates due to Daylight Saving +alias date='TZ=UTC date' +# expand aliases for the one stated above +shopt -s expand_aliases +# hard-coded paths +renvCache="/project/rpp-kshook/Climate_Forcing_Data/assets/r-envs/" # general renv cache path +exactextractrCache="${renvCache}/exact-extract-env" # exactextractr renv cache path +renvPackagePath="${renvCache}/renv_0.15.5.tar.gz" # renv_0.15.5 source path + + +# ========================== +# Necessary Global Variables +# ========================== +# the structure of the original file names are one of the following: +# * %{var}_M_sl%{soil_layer_depth_number}_250m_ll.tif +# * %{var}_250m_ll.tif +# but the user is expected to include complete variable (file) name in the +# `--variable` input argument comma-separated list. + + +# =================== +# Necessary Functions +# =================== +# Modules below available on Compute Canada (CC) Graham Cluster Server +load_core_modules () { + module -q purge + module -q load gcc/9.3.0 + module -q load r/4.1.2 + module -q load gdal/3.0.4 + module -q load udunits/2.2.28 + module -q load geos/3.10.2 + module -q load proj/9.0.0 +} +load_core_modules + + +# ================= +# Useful One-liners +# ================= +# sorting a comma-delimited string of real numbers +sort_comma_delimited () { IFS=',' read -ra arr <<< "$*"; echo ${arr[*]} | tr " " "\n" | sort -n | tr "\n" " "; } + +# log date format +logDate () { echo "($(date +"%Y-%m-%d %H:%M:%S")) "; } + + +####################################### +# subset GeoTIFFs +# +# Globals: +# latLims: comma-delimited latitude +# limits +# lonLims: comma-delimited longitude +# limits +# +# Arguments: +# sourceVrt: source vrt file +# destPath: destionation path (inclu- +# ding file name) +# +# Outputs: +# one mosaiced (merged) GeoTIFF under +# the $destDir +####################################### +subset_geotiff () { + # local variables + local latMin + local latMax + local lonMin + local lonMax + local sortedLats + local sortedLons + # reading arguments + local sourceVrt="$1" + local destPath="$2" + + # extracting minimum and maximum of latitude and longitude respectively + ## latitude + sortedLats=($(sort_comma_delimited "$latLims")) + latMin="${sortedLats[0]}" + latMax="${sortedLats[1]}" + ## longitude + sortedLons=($(sort_comma_delimited "$lonLims")) + lonMin="${sortedLons[0]}" + lonMax="${sortedLons[1]}" + + # subset based on lat/lon - flush to disk at 500MB + GDAL_CACHEMAX=500 + gdal_translate --config GDAL_CACHEMAX 500 \ + -co COMPRESS="DEFLATE" \ + -co BIGTIFF="YES" \ + -projwin $lonMin $latMax $lonMax $latMin "${sourceVrt}" "${destPath}" \ + > /dev/null; +} + + +# =============== +# Data Processing +# =============== +# display info +echo "$(logDate)$(basename $0): processing Soil-Grids-v1 GeoTIFF(s)..." + +# make the output directory +echo "$(logDate)$(basename $0): creating output directory under $outputDir" +mkdir -p "$outputDir" # making the output directory + +# if shapefile is provided extract the extents from it +if [[ -n $shapefile ]]; then + # extract the shapefile extent + IFS=' ' read -ra shapefileExtents <<< "$(ogrinfo -so -al "$shapefile" | sed 's/[),(]//g' | grep Extent)" + # transform the extents in case they are not in EPSG:4326 + IFS=':' read -ra sourceProj4 <<< "$(gdalsrsinfo $shapefile | grep -e "PROJ.4")" # source Proj4 value + if [[ -n $sourceProj4 ]]; then + : + else + echo "$(logDate)$(basename $0): WARNING! Assuming WSG84 CRS for the input ESRI shapefile" + sourceProj4=("PROJ.4" " +proj=longlat +datum=WGS84 +no_defs") # made an array for compatibility with the following statements + fi + + # transform limits and assing to variables + IFS=' ' read -ra leftBottomLims <<< $(echo "${shapefileExtents[@]:1:2}" | gdaltransform -s_srs "${sourceProj4[1]}" -t_srs EPSG:4326 -output_xy) + IFS=' ' read -ra rightTopLims <<< $(echo "${shapefileExtents[@]:4:5}" | gdaltransform -s_srs "${sourceProj4[1]}" -t_srs EPSG:4326 -output_xy) + # define $latLims and $lonLims from $shapefileExtents + lonLims="${leftBottomLims[0]},${rightTopLims[0]}" + latLims="${leftBottomLims[1]},${rightTopLims[1]}" +fi + +# subset and produce stats if needed +if [[ "$printGeotiff" == "true" ]]; then + echo "$(logDate)$(basename $0): subsetting GeoTIFFs under $outputDir" + for var in "${variables[@]}"; do + # subset based on lat and lon values + subset_geotiff "${geotiffDir}/${var}.tif" "${outputDir}/${prefix}${var}.tif" + done +fi + +## make R renv project directory +if [[ -n "$shapefile" ]] && [[ -n $stats ]]; then + echo "$(logDate)$(basename $0): Extracting stats under $outputDir" + mkdir -p "$cache/r-virtual-env/" + ## make R renv in $cache + virtualEnvPath="$cache/r-virtual-env/" + cp "$(dirname $0)/../assets/renv.lock" "$virtualEnvPath" + ## load necessary modules - excessive, mainly for clarification + load_core_modules + + # extract given stats for each variable + for var in "${variables[@]}"; do + ## build renv and create stats + Rscript "$(dirname $0)/../assets/stats.R" \ + "$exactextractrCache" \ + "$renvPackagePath" \ + "$virtualEnvPath" \ + "$virtualEnvPath" \ + "${virtualEnvPath}/renv.lock" \ + "${geotiffDir}/${var}.tif" \ + "$shapefile" \ + "$outputDir/${prefix}stats_${var}.csv" \ + "$stats" \ + "$quantiles" >> "${outputDir}/${prefix}stats_${var}.log" 2>&1; + done +fi + +# remove unnecessary files +mkdir "$HOME/empty_dir" +echo "$(logDate)$(basename $0): deleting temporary files from $cache" +rsync --quiet -aP --delete "$HOME/empty_dir/" "$cache" +rm -r "$cache" +echo "$(logDate)$(basename $0): temporary files from $cache are removed" +echo "$(logDate)$(basename $0): results are produced under $outputDir" +