From 3c6f819f9fccda09c2f8f390cefde04d119f98ce Mon Sep 17 00:00:00 2001
From: Saleh Mamun <mamun4105@gmail.com>
Date: Fri, 15 Nov 2024 16:58:59 -0600
Subject: [PATCH 1/7] One time use functionality

These are one time use functions to prepare global level input dataset for various models.  A typical user does not need to use these functions as these inputs are in the base data folder
---
 scripts/oneoffs/get_crop_production_data.py   |  24 +++
 scripts/oneoffs/ind_sdr_sm.yaml               |  27 +++
 scripts/oneoffs/inject_manage_output.py       |  90 +++++++++
 .../oneoffs/inject_manage_output_simple.py    | 188 ++++++++++++++++++
 scripts/oneoffs/run_es_models.py              | 185 +++++++++++++++++
 scripts/oneoffs/stitch_inputs.py              |  66 ++++++
 6 files changed, 580 insertions(+)
 create mode 100644 scripts/oneoffs/get_crop_production_data.py
 create mode 100644 scripts/oneoffs/ind_sdr_sm.yaml
 create mode 100644 scripts/oneoffs/inject_manage_output.py
 create mode 100644 scripts/oneoffs/inject_manage_output_simple.py
 create mode 100644 scripts/oneoffs/run_es_models.py
 create mode 100644 scripts/oneoffs/stitch_inputs.py

diff --git a/scripts/oneoffs/get_crop_production_data.py b/scripts/oneoffs/get_crop_production_data.py
new file mode 100644
index 0000000..e99ffeb
--- /dev/null
+++ b/scripts/oneoffs/get_crop_production_data.py
@@ -0,0 +1,24 @@
+import os
+import xarray as xr 
+import rioxarray as rio 
+
+path = 'C:/Users/salmamun/Files/base_data/crops/earthstat/22491997/CROPGRIDSv1.08_NC_maps/CROPGRIDSv1.08_NC_maps'
+output_path = "C:/Users/salmamun/Files/base_data/crops/earthstat/crop_production"
+files = os.listdir(path)
+crops = [f.split(".")[1].split("_")[1] for f in files if f.endswith('.nc') and f!='Countries_2018.nc']
+
+for cr in crops:
+    crop_nc_data = xr.open_dataset(os.path.join(path, f'CROPGRIDSv1.08_{cr}.nc'))
+    crop_band = crop_nc_data['qual']
+    crop_band = crop_band.rio.set_spatial_dims(x_dim='lon', y_dim='lat')
+    crop_band.rio.write_crs("epsg:4326", inplace=True)
+    output_path_cr = os.path.join(output_path, cr)
+    if not os.path.exists(output_path_cr):
+        os.makedirs(output_path_cr)
+    crop_band.rio.to_raster(os.path.join(output_path_cr, f"{cr}_Production.tif"))
+
+# print(crop_nc_data.variables.keys())
+
+
+
+
diff --git a/scripts/oneoffs/ind_sdr_sm.yaml b/scripts/oneoffs/ind_sdr_sm.yaml
new file mode 100644
index 0000000..b8e80e0
--- /dev/null
+++ b/scripts/oneoffs/ind_sdr_sm.yaml
@@ -0,0 +1,27 @@
+workspace_dir: "D:/MANAGE/SDRResults/output_dem_3s"
+"results_suffix": "base"
+
+# The following are the raster needed for models
+lulc_path: "D:/MANAGE/CommonInputs/RasterInputs//lulc_7755.tif"
+dem_path: "D:/MANAGE/CommonInputs/RasterInputs/dem_con_7755.tif"
+precipitation_raster_path: "D:/MANAGE/CommonInputs/RasterInputs/precipitation_annual_7755.tif"
+erodibility_path: "D:/MANAGE/CommonInputs/RasterInputs/k_factor_7755.tif"
+erosivity_path: "D:/MANAGE/CommonInputs/RasterInputs/rainfall_erosivity_7755.tif"
+
+# The following are the vector needed for models
+watersheds_path: "D:/MANAGE/CommonInputs/watersheds/watersheds_7755_multipart.shp"
+
+# The following are the parameters tables
+biophysical_table_path: "D:/MANAGE/CommonInputs/BiophysicalTables/sdr_parameter_table_onil.csv"
+
+# The following are specific parameters for the models
+threshold_flow_accumulation: 75  # Taken from Onil - 75
+k_param: 2
+sdr_max: 0.8       # DEFAULT_SDR_SDR_MAX
+ic_0_param: 0.5        # DEFAULT_SDR_IC_0_PARAM
+l_max: 122         # DEFAULT_SDR_L_MAX
+drainage_path: ''  # This parameter is optional
+
+# Resource management parameters
+n_workers: 10     # Provide the number of cores to use.
+
diff --git a/scripts/oneoffs/inject_manage_output.py b/scripts/oneoffs/inject_manage_output.py
new file mode 100644
index 0000000..45b08cd
--- /dev/null
+++ b/scripts/oneoffs/inject_manage_output.py
@@ -0,0 +1,90 @@
+import os
+import geopandas as gpd
+import numpy as np
+import pandas as pd
+import rasterio as rio
+from hazelbean import rasterize_to_match
+from pygeoprocessing import zonal_statistics
+
+SEAL7_MANAGE_LANDUSE_DICT = {
+    'Shrubland': 5,
+    'Pastureland': 3,
+    'SavnGrassLnd': 3,
+    'cropLand': 2,
+    'Builtupland': 1,
+    'Forest': 4,
+    'unManagedForest': 4,
+    'Otherland': 5}
+
+iso3 = "TZA"
+workspace = "C:/Users/salmamun/Files/seals/projects/manage"
+aez_vector_file = "G:/Shared drives/NatCapTEEMs/Projects/WB_MANAGE_Project/GTAPLULCv10adminaez/GTAPLULCv10adminaez.shp"
+project_aoi = os.path.join(workspace, "intermediate", "project_aoi", f"aoi_{iso3}.gpkg")
+area_ha = os.path.join(workspace, "intermediate", "project_aoi", "pyramids", "aoi_ha_per_cell_fine.tif")
+seal7_lulc = os.path.join(workspace, "intermediate", "fine_processed_inputs", "lulc", "esa", "seals7", "lulc_esa_seals7_2017.tif")
+
+regional_change_csv = "G:/Shared drives/NatCapTEEMs/Projects/WB_MANAGE_Project/toInvest.csv"
+
+
+
+
+def reformat_aez(regional_change_csv):
+    regional_change_df = pd.read_csv(regional_change_csv)
+    regional_change_df.columns = ["GTAP141", "AEZ", "landuse", "year", "change_percent"]
+    for col in ["GTAP141", "AEZ", "landuse"]:
+        regional_change_df[col] = regional_change_df[col].str.replace("'", '')
+    regional_change_df['AEZ'] = regional_change_df['AEZ'].str.replace("f-AEZ", '')
+    return regional_change_df
+
+
+
+def get_total_by_aez_lu(regional_change_df, aez_vector_file, area_ha, seal7_lulc):
+    aez = gpd.read_file(aez_vector_file)
+    aez_iso = aez[aez['GTAP141'] == iso3]
+    aez_iso = aez_iso.to_crs("EPSG:4326")
+    aez_iso_file = os.path.join(workspace, "intermediate", "project_aoi", "pyramids", f"aoi_{iso3}.gpkg")
+    aez_iso.to_file(aez_iso_file, driver='GPKG')
+
+    aez_raster = os.path.join(workspace, "intermediate", "project_aoi", "pyramids", f"aez_{iso3}_raster.tif")
+    rasterize_to_match(input_vector_path = aez_iso_file,
+                       match_raster_path = seal7_lulc,
+                        output_raster_path = aez_raster,
+                          burn_column_name='AEZ',
+                            burn_values=None,
+                              datatype=5,
+                                ndv=None,
+                                  all_touched=False)
+    area = read_raster_as_array(area_ha)
+    lulc = read_raster_as_array(seal7_lulc)
+    aez = read_raster_as_array(aez_raster)
+    for aez_val in pd.unique(regional_change_df.AEZ):
+        regional_change_df_aez = regional_change_df[regional_change_df['AEZ'] == aez_val]
+        for lu_desc in pd.unique(regional_change_df.landuse):
+            lulc_code = SEAL7_MANAGE_LANDUSE_DICT[lu_desc]
+            lulc_mask = lulc == lulc_code
+            aez_mask = aez == int(aez_val)
+            area_lu_aez = area * lulc_mask * aez_mask
+            regional_change_df.loc[(regional_change_df['landuse'] == lu_desc) & (regional_change_df["AEZ"] == aez_val),
+                                   ["total_ha"]] = np.sum(area_lu_aez)
+    
+    regional_change_df["change_ha"] = regional_change_df["total_ha"]*regional_change_df["change_percent"]/100
+    
+    return regional_change_df
+
+def read_raster_as_array(raster_path):
+    with rio.open(raster_path) as src:
+        return src.read(1)
+
+
+
+def input_seal_format(regional_change_df):
+    regional_change_df['region_label'] = regional_change_df['GTAP141'] + regional_change_df['AEZ']
+    regional_change_df = regional_change_df.pivot(index='region_label', columns='landuse', values='change_ha')
+    regional_change_df = regional_change_df.reset_index()
+    regional_change_df = regional_change_df.fillna(0)
+    return regional_change_df
+
+regional_change_df = reformat_aez(regional_change_csv)
+regional_change_df = get_total_by_aez_lu(regional_change_df, aez_vector_file, area_ha, seal7_lulc)
+regional_change_df = input_seal_format(regional_change_df)
+regional_change_df.to_csv(os.path.join(workspace, "intermediate", "project_aoi", "pyramids", f"regional_change_input.csv"), index=False)
\ No newline at end of file
diff --git a/scripts/oneoffs/inject_manage_output_simple.py b/scripts/oneoffs/inject_manage_output_simple.py
new file mode 100644
index 0000000..2c84609
--- /dev/null
+++ b/scripts/oneoffs/inject_manage_output_simple.py
@@ -0,0 +1,188 @@
+import os
+import geopandas as gpd
+import numpy as np
+import pandas as pd
+import rasterio as rio
+from hazelbean import rasterize_to_match
+import pygeoprocessing.geoprocessing as geo
+
+
+SEAL7_MANAGE_LANDUSE_DICT = {
+    'Shrubland': 5,
+    'Pastureland': 3,
+    'SavnGrassLnd': 3,
+    'cropLand': 2,
+    'Builtupland': 1,
+    'Forest': 4,
+    'unManagedForest': 4,
+    'Otherland': 5}
+
+SEAL7_LANDUSE_DICT = {
+    'urban': 1,
+    'cropLand': 2,
+    'grassland': 3,
+    'forest': 4,
+    'othernat': 5,
+    'water': 6,
+    'other': 7
+    }
+
+
+def reformat_aez(regional_change_csv):
+    regional_change_df = pd.read_csv(regional_change_csv)
+    regional_change_df.columns = ["GTAP141", "AEZ", "landuse", "year", "change_percent", "abs_change_mha"]
+    for col in ["GTAP141", "AEZ", "landuse"]:
+        regional_change_df[col] = regional_change_df[col].str.replace("'", '')
+    regional_change_df['AEZ'] = regional_change_df['AEZ'].str.replace("f-AEZ", '')
+    return regional_change_df
+
+
+
+def get_total_by_aez_lu(regional_change_df, aez_vector_file, area_ha, seal7_lulc):
+    aez = gpd.read_file(aez_vector_file)
+    aez_iso = aez[aez['GTAP141'] == iso3]
+    aez_iso = aez_iso.to_crs("EPSG:4326")
+    aez_iso_file = os.path.join(workspace, "intermediate", "project_aoi", "pyramids", f"aoi_{iso3}.gpkg")
+    aez_iso.to_file(aez_iso_file, driver='GPKG')
+
+    aez_raster = os.path.join(workspace, "intermediate", "project_aoi", "pyramids", f"aez_{iso3}_raster.tif")
+    rasterize_to_match(input_vector_path = aez_iso_file,
+                       match_raster_path = seal7_lulc,
+                        output_raster_path = aez_raster,
+                          burn_column_name='AEZ',
+                            burn_values=None,
+                              datatype=5,
+                                ndv=None,
+                                  all_touched=False)
+    area = read_raster_as_array(area_ha)
+    lulc = read_raster_as_array(seal7_lulc)
+    aez = read_raster_as_array(aez_raster)
+    for aez_val in pd.unique(regional_change_df.AEZ):
+        regional_change_df_aez = regional_change_df[regional_change_df['AEZ'] == aez_val]
+        for lu_desc in pd.unique(regional_change_df.landuse):
+            lulc_code = SEAL7_MANAGE_LANDUSE_DICT[lu_desc]
+            lulc_mask = lulc == lulc_code
+            aez_mask = aez == int(aez_val)
+            area_lu_aez = area * lulc_mask * aez_mask
+            regional_change_df.loc[(regional_change_df['landuse'] == lu_desc) & (regional_change_df["AEZ"] == aez_val),
+                                   ["total_ha"]] = np.sum(area_lu_aez)
+    
+    regional_change_df["change_ha"] = regional_change_df["total_ha"]*regional_change_df["change_percent"]/100
+    
+    return regional_change_df
+
+def read_raster_as_array(raster_path):
+    with rio.open(raster_path) as src:
+        return src.read(1)
+
+
+
+def input_seal_format(regional_change_df):
+    regional_change_df['region_label'] = regional_change_df['GTAP141'] + regional_change_df['AEZ']
+    regional_change_df = regional_change_df.pivot(index='region_label', columns='landuse', values='change_ha')
+    regional_change_df = regional_change_df.reset_index()
+    regional_change_df = regional_change_df.fillna(0)
+    return regional_change_df
+
+def get_SEALS_lucodes(regional_change_df, SEAL7_MANAGE_LANDUSE_DICT, SEAL7_LANDUSE_DICT):
+    regional_change_df['seals_code'] = regional_change_df['landuse'].map(SEAL7_MANAGE_LANDUSE_DICT) 
+    SEAL7_LANDUSE_DICT = dict((v,k) for k,v in SEAL7_LANDUSE_DICT.items())
+    regional_change_df['seals_desc'] = regional_change_df['seals_code'].map(SEAL7_LANDUSE_DICT)
+    return regional_change_df
+
+def collapse_to_country(regional_change_df):
+    regional_change_df = regional_change_df[['abs_change_mha', 'seals_desc']].groupby('seals_desc').sum()*1000000
+    regional_change_df = regional_change_df.reset_index()
+    return regional_change_df
+
+def country_input_seals_format(regional_change_df):
+    regional_change_df = regional_change_df.T
+    regional_change_df.columns = regional_change_df.iloc[0]
+    regional_change_df = regional_change_df[1:]
+    regional_change_df.insert(0, 'region_label', ['tza'])
+    return regional_change_df
+
+def get_lulc_area(lulc_raster, area_ha_raster):
+    lulc = read_raster_as_array(lulc_raster)
+    area = read_raster_as_array(area_ha_raster)
+    lulc_area = {}
+    for lulc_code in np.unique(lulc):
+        if lulc_code == 0 or lulc_code == 255:
+            continue
+        else:
+            lulc_mask = lulc == lulc_code
+            lulc_area[lulc_code] = np.sum(area * lulc_mask)
+    return lulc_area
+
+def get_aligned_rasters(base_raster, aoi_vector, source_raster_dict):
+    base_lulc_info = geo.get_raster_info(base_raster)
+    
+    src_paths = []
+    dst_paths = []
+    for k, v in source_raster_dict.items():
+        if os.path.splitext(v)[1] in [".tif", ".bil", ".img"]:
+            src_paths.append(v)
+            target_raster_path = v.replace(".tif", "_aligned.tif")
+            dst_paths.append(target_raster_path)
+    
+    geo.align_and_resize_raster_stack(
+        base_raster_path_list=src_paths,
+        target_raster_path_list=dst_paths,
+        resample_method_list=['near' for _ in src_paths],
+        target_pixel_size=base_lulc_info['pixel_size'],
+        bounding_box_mode="intersection",
+        base_vector_path_list=[aoi_vector],
+        mask_options={"mask_vector_path": aoi_vector},
+        raster_align_index=0            # Assume that first raster is the base raster
+    )
+
+
+if __name__=="__main__":
+    iso3 = "IND"
+    workspace = "C:/Users/salmamun/Files/seals/projects/manage_ind_boundary"
+    aez_vector_file = "G:/Shared drives/NatCapTEEMs/Projects/WB_MANAGE_Project/GTAPLULCv10adminaez/GTAPLULCv10adminaez.shp"
+    project_aoi = os.path.join(workspace, "intermediate", "project_aoi_gtap_r251", f"aoi_{iso3}.gpkg")
+    area_ha = os.path.join(workspace, "intermediate", "project_aoi_gtap_r251", "pyramids", "aoi_ha_per_cell_fine.tif")
+    seal7_lulc = os.path.join(workspace, "intermediate", "fine_processed_inputs", "lulc", "esa", "seals7", "lulc_esa_seals7_2017.tif")
+
+    regional_change_csv = "G:/Shared drives/NatCapTEEMs/Projects/WB_MANAGE_Project/toInvest_abs.csv"
+    
+    
+    regional_change_df = reformat_aez(regional_change_csv)
+    regional_change_df = get_SEALS_lucodes(regional_change_df, SEAL7_MANAGE_LANDUSE_DICT, SEAL7_LANDUSE_DICT)
+    regional_change_df = collapse_to_country(regional_change_df)
+    regional_change_df = country_input_seals_format(regional_change_df)
+
+
+
+    regional_change_df = get_total_by_aez_lu(regional_change_df, aez_vector_file, area_ha, seal7_lulc)
+    regional_change_df = input_seal_format(regional_change_df)
+    regional_change_df.to_csv(os.path.join(workspace, "intermediate", "project_aoi_gtap_r251", "pyramids", f"regional_change_input_country.csv"), index=False)
+
+    # Check if things work
+    source_raster_dict = {
+        "bau": os.path.join(workspace, "intermediate", "stitched_lulc_simplified_scenarios", "lulc_esa_seals7_ssp2_rcp45_luh2-message_bau_2030.tif"),
+        "bau_shift": os.path.join(workspace, "intermediate", "stitched_lulc_simplified_scenarios", "lulc_esa_seals7_ssp2_rcp45_luh2-message_bau_shift_2030.tif"),
+        "baseline": seal7_lulc,
+        "area_ha_raster": os.path.join(workspace, "intermediate", "project_aoi_gtap_r251", "pyramids", "aoi_ha_per_cell_fine.tif")
+    }
+    # Do the following once
+    get_aligned_rasters(base_raster = seal7_lulc, aoi_vector=project_aoi, source_raster_dict=source_raster_dict)
+    base_lulc_raster = os.path.join(workspace, "intermediate", "fine_processed_inputs", "lulc", "esa", "seals7", "lulc_esa_seals7_2017_aligned.tif")
+    area_ha_raster = os.path.join(workspace, "intermediate", "project_aoi_gtap_r251", "pyramids", "aoi_ha_per_cell_fine_aligned.tif")
+    base_lulc_area = get_lulc_area(base_lulc_raster, area_ha_raster)
+
+    bau_lulc_raster = os.path.join(workspace, "intermediate", "stitched_lulc_simplified_scenarios", "lulc_esa_seals7_ssp2_rcp45_luh2-message_bau_2030_aligned.tif")
+    bau_lulc_area = get_lulc_area(bau_lulc_raster, area_ha_raster)
+
+    bau_shift_lulc_raster = os.path.join(workspace, "intermediate", "stitched_lulc_simplified_scenarios", "lulc_esa_seals7_ssp2_rcp45_luh2-message_bau_shift_2030_aligned.tif")
+    bau_shift_lulc_area = get_lulc_area(bau_shift_lulc_raster, area_ha_raster)
+
+    base = pd.DataFrame.from_dict(base_lulc_area, orient='index', columns=['base_ha'])
+    bau = pd.DataFrame.from_dict(bau_lulc_area, orient='index', columns=['bau_ha'])
+    bau_shift = pd.DataFrame.from_dict(bau_shift_lulc_area, orient='index', columns=['bau_shift_ha'])
+
+    area_data = pd.concat([base, bau, bau_shift], axis=1)
+    area_data = area_data.rename_axis('code').reset_index()
+    area_data['landuse'] = area_data['code'].map(dict((v,k) for k,v in SEAL7_LANDUSE_DICT.items()))
+    area_data.to_csv(os.path.join(workspace, "intermediate", "project_aoi_gtap_r251", "pyramids", "lulc_area_2030.csv"), index=False)
\ No newline at end of file
diff --git a/scripts/oneoffs/run_es_models.py b/scripts/oneoffs/run_es_models.py
new file mode 100644
index 0000000..b436aac
--- /dev/null
+++ b/scripts/oneoffs/run_es_models.py
@@ -0,0 +1,185 @@
+import os
+import shutil
+import yaml
+import geopandas as gpd
+import pygeoprocessing.geoprocessing as geo
+import rasterio as rio
+from rasterio.warp import calculate_default_transform, reproject, Resampling
+
+import natcap.invest.sdr.sdr
+import time
+
+
+def slice_inputs(base_raster, source_raster_dict, aoi_vector, target_folder, huc4= 'None'):
+    """
+    Assumes that `source_raster_dict` will be keyed as new_name: orig_path
+    """
+    base_lulc_info = geo.get_raster_info(base_raster)
+    
+    src_paths = []
+    dst_paths = []
+    dst_dict = {}
+    for k, v in source_raster_dict.items():
+        if os.path.splitext(v)[1] in [".tif", ".bil", ".img"]:
+            src_paths.append(v)
+            dst_paths.append(os.path.join(target_folder, f'{k}.tif'))
+            dst_dict[k] = os.path.join(target_folder, f'{k}.tif')
+    
+    try:
+        raster_align_index = int([i for i, x in enumerate(src_paths) if x == source_raster_dict[f'dem_7755']][0])
+    except:
+        raster_align_index = None
+
+    geo.align_and_resize_raster_stack(
+        base_raster_path_list=src_paths,
+        target_raster_path_list=dst_paths,
+        resample_method_list=['near' for _ in src_paths],
+        target_pixel_size=base_lulc_info['pixel_size'],
+        bounding_box_mode="intersection",
+        base_vector_path_list=[aoi_vector],
+        raster_align_index=raster_align_index,
+        vector_mask_options={'mask_vector_path': aoi_vector},
+    )
+    return dst_dict
+
+def reproject_7755(raster_path, target_raster_path, dst_crs='EPSG:7755'):
+    '''
+    This function reprojects the raster to EPSG:7755 or any given crs.
+    This helper function is used in data_preparation function.
+    '''
+    with rio.open(raster_path) as src:
+        transform, width, height = calculate_default_transform(
+            src.crs, dst_crs, src.width, src.height, *src.bounds)
+        kwargs = src.meta.copy()
+        kwargs.update({
+            'crs': dst_crs,
+            'transform': transform,
+            'width': width,
+            'height': height,
+            "compress": "LZW"
+        })
+        if src.crs != dst_crs:
+            with rio.open(target_raster_path, 'w', **kwargs) as dst:
+                for i in range(1, src.count + 1):
+                    reproject(
+                        source=rio.band(src, i),
+                        destination=rio.band(dst, i),
+                        src_transform=src.transform,
+                        src_crs=src.crs,
+                        dst_transform=transform,
+                        dst_crs=dst_crs,
+                        resampling=Resampling.nearest)
+        else:
+            shutil.copy(raster_path, target_raster_path)
+
+def fix_watersheds(watersheds_path, aoi_path, unique_id_field):
+    '''
+    This function fixes the watersheds shapefile by:
+     1) clipping the watershed file to aoi and then
+     2) exploding multipart polygons and keeping the largest area polygon for each unique id.
+    TODO: The fix might delete relative large polygons if they are not the largest in the multipart polygon. Need to discuss this.
+    Do we implement a filter based on area?
+    '''
+    watersheds_gdf = gpd.read_file(watersheds_path)
+    if watersheds_gdf.crs==None:
+        watersheds_gdf.crs = "EPSG:4326"
+    if watersheds_gdf.crs != "EPSG:7755":
+        watersheds_gdf = watersheds_gdf.to_crs(7755)
+
+    aoi_gdf = gpd.read_file(aoi_path)
+    clipped_gdf = gpd.clip(watersheds_gdf, aoi_gdf, keep_geom_type=False)
+    clipped_wtershed = watersheds_path.replace(".shp", "_IND_clipped.shp")
+    clipped_gdf.to_file(clipped_wtershed, driver='ESRI Shapefile')
+
+    watersheds_gdf = clipped_gdf.explode(index_parts=True)
+    watersheds_gdf = watersheds_gdf.loc[watersheds_gdf.geometry.geometry.type=='Polygon']       # Only keep Polygons
+    watersheds_gdf["area"] = watersheds_gdf['geometry'].area
+    multipart_wtershed = watersheds_path.replace(".shp", "_IND_multipart.shp")
+    watersheds_gdf.to_file(multipart_wtershed, driver='ESRI Shapefile')
+    watersheds_gdf = watersheds_gdf.merge(watersheds_gdf.groupby([unique_id_field])['area'].max().reset_index(), on=[unique_id_field, 'area'], how='right')
+    fixed_watersheds = watersheds_path.replace(".shp", "_IND_fixed.shp")
+    watersheds_gdf.to_file(fixed_watersheds, driver='ESRI Shapefile')
+
+def data_preparation(seal_lulc_4326):
+    '''
+    This function prepares the data for the ecosystem services models.
+    You need to run this function onece to prepare the data.
+    I did not give any argrument to this function because I want to run this function only once.'''
+
+    source_raster_dict = {}
+    source_raster_dict[f"dem_con"]= "D:/MANAGE/CommonInputs/DEMHydroSHED3s/as_con_3s/as_con_3s.tif"
+    source_raster_dict[f"lulc"]= seal_lulc_4326
+    source_raster_dict[f"precipitation_annual"]= "D:/NDR/CommonInputs/MonthlyPrecipitation2021/WorldClim/wc2.1_30s_prec_annual.tif"
+    source_raster_dict[f"k_factor"]= "D:/NDR/CommonInputs/ISRIC/global_soil_erodibility.tif"
+    source_raster_dict[f"rainfall_erosivity"]= "D:/NDR/CommonInputs/GlobalRFactor/GlobalR/GlobalR_NoPol.tif"
+
+    raster_folder = "D:/MANAGE/CommonInputs/RasterInputs"
+    aoi_buffer_4326 = "D:/MANAGE/CommonInputs/aoi/aoi_IND_buffer.shp"
+
+    for k, v in source_raster_dict.items():
+        single_dict = {k: v}
+        base_raster = v
+        slice_inputs(base_raster, single_dict, aoi_buffer_4326, raster_folder)
+        unprojected_raster = os.path.join(raster_folder, f'{k}.tif')
+        projected_raster = os.path.join(raster_folder, f'{k}_7755.tif')
+        reproject_7755(unprojected_raster, projected_raster, dst_crs='EPSG:7755')
+
+    # Get other resolution of dem
+    dem_path = os.path.join(raster_folder, "dem_con_7755.tif")
+    dem_resolution(dem_path, resolution=300)
+    dem_resolution(dem_path, resolution=1000)
+    
+    # Fix watersheds
+    watersheds_path = "D:/MANAGE/CommonInputs/watersheds/hybas_as_lev06_v1c.shp"
+    aoi_path = "D:/MANAGE/CommonInputs/aoi/aoi_IND.shp"
+    fix_watersheds(watersheds_path, aoi_path=aoi_path, unique_id_field="SORT")
+
+def dem_resolution(dem_path, resolution=300):
+    '''
+    This function resamples the dem to the given resolution.
+    '''
+    with rio.open(dem_path) as dataset:
+        scale_factor_x = dataset.res[0]/resolution
+        scale_factor_y = dataset.res[1]/resolution
+
+        profile = dataset.profile.copy()
+        # resample data to target shape
+        data = dataset.read(
+            out_shape=(
+                dataset.count,
+                int(dataset.height * scale_factor_y),
+                int(dataset.width * scale_factor_x)
+            ),
+            resampling=Resampling.cubic_spline
+        )
+
+        # scale image transform
+        transform = dataset.transform * dataset.transform.scale(
+            (1 / scale_factor_x),
+            (1 / scale_factor_y)
+        )
+        profile.update({"height": data.shape[-2],
+                        "width": data.shape[-1],
+                    "transform": transform})
+
+    target_dem_path = dem_path.replace('.tif', f"_{resolution}.tif")
+    with rio.open(target_dem_path, "w", **profile) as dataset:
+        dataset.write(data)
+
+
+def sdr_run(config):
+    '''
+    This function runs the SDR model with given args.
+    '''
+    with open(config) as yaml_data_file:
+        sdr_args = yaml.load(yaml_data_file, Loader=yaml.FullLoader)
+    natcap.invest.sdr.sdr.execute(sdr_args)
+    print(f"==============================================SDR finished======================================")
+    
+if __name__ == "__main__":
+    star_time = time.time()
+    seal_lulc_4326 = "C:/Users/salmamun/Files/seals/projects/manage_ind_boundary/intermediate/fine_processed_inputs/lulc/esa/seals7/lulc_esa_seals7_2017.tif"
+    # data_preparation(seal_lulc_4326)          # Run this function to prepare the data. Run it only once.
+    config  = "C:/Users/salmamun/Files/seals/seals_dev/scripts/ind_sdr_sm.yaml"
+    sdr_run(config)
+    print(f"Total time taken: {time.time() - star_time} seconds")
\ No newline at end of file
diff --git a/scripts/oneoffs/stitch_inputs.py b/scripts/oneoffs/stitch_inputs.py
new file mode 100644
index 0000000..1504367
--- /dev/null
+++ b/scripts/oneoffs/stitch_inputs.py
@@ -0,0 +1,66 @@
+import os
+import pandas as pd
+import geopandas as gpd
+from osgeo import gdal
+gdal.UseExceptions()
+
+
+def get_files(tile_dir: str):
+    '''
+    Get all the files in the directory that end with .tif
+    
+    Parameters:
+    tile_dir (str): The directory where the files are located
+    Returns: a list of file paths
+    '''
+    dem_fps = []
+    for files in os.listdir(tile_dir):
+            if files.endswith(".tif") or files.endswith(".shp"):
+                dem_fps.append(os.path.join(tile_dir, files))
+    return dem_fps
+
+
+def gdal_parallel_mosaic(dem_fps:list, out_fp:str, dest_crs:str="EPSG:4326", n_cores:str="ALL_CPUS"):
+    '''
+    Mosaic the DEM files. This method is faster as it can use mutiprocessing and directly saving after each tile is read.
+    The rasterio version fails if the number of files is too large.
+    Parameters:
+    dem_fps (list): A list of file paths.
+    out_fp (str): The output file path for the mosaic raster.
+    dest_crs (str): The destination crs. Default is EPSG:4326
+    n_cores (str): The number of cores to use. Default is ALL_CPUS.
+    
+    Returns: None. It creates a mosaic raster.'''
+    gdal.Warp(destNameOrDestDS=out_fp,
+              srcDSOrSrcDSTab = dem_fps,
+               format="GTiff",
+               resampleAlg="bilinear",
+               xRes=0.002777777777777777884,
+               yRes=0.002777777777777777884,
+               options=f"-overwrite -multi -wm 80% -t_srs {dest_crs} -tr 0.002777777777777777884 0.002777777777777777884 -co TILED=YES -co BIGTIFF=YES -co COMPRESS=LZW -co NUM_THREADS={n_cores}")
+    
+def join_vector(dem_fps:list, out_fp:str):
+    '''
+    Join the vector files and save as a single geopackage file.
+    Parameters:
+    dem_fps (list): A list of vector file paths.
+    out_fp (str): The output file path for the joined vector.
+    Returns: None. It creates a joined vector file.
+    TODO: Add a check to see if a file is empty and skip it from concatations.
+    '''
+    src_files_to_gpd = gpd.GeoDataFrame()
+    for fp in dem_fps:
+        src = gpd.read_file(fp)
+        src_files_to_gpd = pd.concat([src_files_to_gpd, src], ignore_index=True)
+        src_files_to_gpd.to_file(out_fp, driver="GPKG")
+
+
+if __name__ == "__main__":
+    dem_dir = "D:/MANAGE/CommonInputs/DEMHydroSHED3s/continetal_inputs"
+    watershed_dir = "D:/MANAGE/CommonInputs/GlobalWatersheds/continetal_inputs"
+    dem_output = os.path.join(os.path.dirname(dem_dir), "global_dem_10as.tif")
+    watershed_output = os.path.join(os.path.dirname(watershed_dir), "watersheds_level_7.gpkg")
+    dem_fps = get_files(dem_dir)
+    watershed_fps = get_files(watershed_dir)
+    # gdal_parallel_mosaic(dem_fps, dem_output)
+    join_vector(watershed_fps, watershed_output)
\ No newline at end of file

From dd8715db061d0d35b42a1fd6f55d159956787e17 Mon Sep 17 00:00:00 2001
From: Saleh Mamun <mamun4105@gmail.com>
Date: Fri, 15 Nov 2024 17:00:31 -0600
Subject: [PATCH 2/7] Added calculation of pollination sufficiency and
 pollination economic shock

These functions estimates pollination sufficiency from a lulc and then compare the scenario lulc with baseline lulc to calculate shocks depending on the crop adjusted pollination dependence. These functions are not modified much other than making it homogenous such that it can take any scenario parameter provided by seals. Previously these functions were static only for 2017 baseline and 2022 scenario
---
 scripts/pollination_biophysical_model.py | 399 +++++++++++++++++++++++
 scripts/pollination_shocks.py            | 249 ++++++++++++++
 2 files changed, 648 insertions(+)
 create mode 100644 scripts/pollination_biophysical_model.py
 create mode 100644 scripts/pollination_shocks.py

diff --git a/scripts/pollination_biophysical_model.py b/scripts/pollination_biophysical_model.py
new file mode 100644
index 0000000..04fc044
--- /dev/null
+++ b/scripts/pollination_biophysical_model.py
@@ -0,0 +1,399 @@
+''''
+This script is part of global_invest>ecosystem_services_function.py. I deleted the carbon part to make it simple.
+I also cleaned unnecessary imports.
+'''
+
+import os, sys
+import hazelbean as hb
+import logging
+import multiprocessing
+import os
+import sys
+import time
+from osgeo import gdal
+from osgeo import osr
+import numpy
+import pygeoprocessing
+import scipy.ndimage.morphology
+import taskgraph
+
+LANDCOVER_DATA_MAP = {
+    'data_suffix': 'landcover raster.tif',
+}
+_MULT_NODATA = -1
+_MASK_NODATA = 2
+GLOBIO_AG_CODES = [2, (10, 40), (230, 232)]
+GLOBIO_NATURAL_CODES = [6, (50, 180)]
+BMP_LULC_CODES = [300]
+
+NODATA = -9999
+N_WORKERS = max(1, multiprocessing.cpu_count())
+
+def pollination_sufficiency(lulc_input_path, pollination_sufficiency_output_path):
+
+    
+    """
+    Pollination sufficiency analysis. This is based off the IPBES-Pollination
+    project so that we can run on any new LULC scenarios with ESA classification.
+    Used to be called dasgupta_agriculture.py but then we did it for more than just Dasgupta
+    """
+
+    global LOGGER, WORKING_DIR, ECOSHARD_DIR, CHURN_DIR, _MULT_NODATA, _MASK_NODATA, GLOBIO_AG_CODES, GLOBIO_NATURAL_CODES, BMP_LULC_CODES, NODATA, N_WORKERS, LANDCOVER_DATA_MAP
+
+    WORKING_DIR = os.path.split(pollination_sufficiency_output_path)[0]
+    ECOSHARD_DIR = os.path.join(WORKING_DIR, 'ecoshard_dir')
+    CHURN_DIR = os.path.join(WORKING_DIR, 'churn')
+
+    # format of the key pairs is [data suffix]: [landcover raster]
+    # these must be ESA landcover map type
+    LANDCOVER_DATA_MAP = {
+        'data_suffix': 'landcover raster.tif',
+    }
+
+    # set a limit for the cache
+    gdal.SetCacheMax(2**28)
+
+    logging.basicConfig(
+        level=logging.DEBUG,
+        format=(
+            '%(asctime)s (%(relativeCreated)d) %(levelname)s %(name)s'
+            ' [%(pathname)s.%(funcName)s:%(lineno)d] %(message)s'),
+        stream=sys.stdout)
+    LOGGER = logging.getLogger('pollination')
+    logging.getLogger('taskgraph').setLevel(logging.INFO)
+
+    _MULT_NODATA = -1
+    _MASK_NODATA = 2
+    # the following are the globio landcover codes. A tuple (x, y) indicates the
+    # inclusive range from x to y. Pollinator habitat was defined as any natural
+    # land covers, as defined (GLOBIO land-cover classes 6, secondary vegetation,
+    # and  50-180, various types of primary vegetation). To test sensitivity to
+    # this definition we included "semi-natural" habitats (GLOBIO land-cover
+    # classes 3, 4, and 5; pasture, rangeland and forestry, respectively) in
+    # addition to "natural", and repeated all analyses with semi-natural  plus
+    # natural habitats, but this did not substantially alter the results  so we do
+    # not include it in our final analysis or code base.
+
+    GLOBIO_AG_CODES = [2, (10, 40), (230, 232)]
+    GLOBIO_NATURAL_CODES = [3, 4, 5, (50, 180)]
+    BMP_LULC_CODES = [300]
+
+    WORKING_DIR = os.path.split(pollination_sufficiency_output_path)[0]
+    ECOSHARD_DIR = os.path.join(WORKING_DIR, 'ecoshard_dir')
+    CHURN_DIR = os.path.join(WORKING_DIR, 'churn')
+
+    NODATA = -9999
+    N_WORKERS = max(1, multiprocessing.cpu_count())
+
+    landcover_path = lulc_input_path    
+    
+
+    print('WORKING_DIR', WORKING_DIR)
+    print('ECOSHARD_DIR', ECOSHARD_DIR)
+    print('CHURN_DIR', CHURN_DIR)
+
+    landcover_raster_list = []
+    landcover_raster_list.append(landcover_path)
+
+    task_graph = taskgraph.TaskGraph(
+        WORKING_DIR, N_WORKERS, reporting_interval=5.0)
+    for dir_path in [
+            ECOSHARD_DIR, CHURN_DIR]:
+        try:
+            os.makedirs(dir_path)
+        except OSError:
+            pass
+    time.sleep(1.0)
+
+    for landcover_path in landcover_raster_list:
+        LOGGER.info("process landcover map: %s", landcover_path)
+        calculate_for_landcover(task_graph, landcover_path, pollination_sufficiency_output_path)
+
+    task_graph.join()
+    task_graph.close()
+
+    remove_intermediates = True
+    if remove_intermediates:
+        hb.remove_dirs(CHURN_DIR, safety_check='delete')
+        hb.remove_dirs(ECOSHARD_DIR, safety_check='delete')
+        hb.remove_path(os.path.join(os.path.split(ECOSHARD_DIR)[0], 'taskgraph_data.db'))
+
+
+
+def calculate_for_landcover(task_graph, landcover_path, output_path):
+    """Calculate values for a given landcover.
+    Parameters:
+        task_graph (taskgraph.TaskGraph): taskgraph object used to schedule
+            work.
+        landcover_path (str): path to a landcover map with globio style
+            landcover codes.
+
+    Returns:
+        None.
+    """
+    landcover_key = os.path.splitext(os.path.basename(landcover_path))[0]
+    # landcover_key = landcover_key.replace('lulc_' + p.lulc_src_label + '_' + p.lulc_simplification_label, '_')
+    output_dir = os.path.join(WORKING_DIR)
+    # output_dir = os.path.join(WORKING_DIR, landcover_key)
+    for dir_path in [output_dir, ECOSHARD_DIR, CHURN_DIR]:
+        try:
+            os.makedirs(dir_path)
+        except OSError:
+            pass
+
+
+        #NEEDED
+    # The proportional area of natural within 2 km was calculated for every
+    #  pixel of agricultural land (GLOBIO land-cover classes 2, 230, 231, and
+    #  232) at 10 arc seconds (~300 m) resolution. This 2 km scale represents
+    #  the distance most commonly found to be predictive of pollination
+    #  services (Kennedy et al. 2013).
+    kernel_raster_path = os.path.join(CHURN_DIR, 'radial_kernel.tif')
+    kernel_task = task_graph.add_task(
+        func=create_radial_convolution_mask,
+        args=(0.00277778, 2000., kernel_raster_path),
+        target_path_list=[kernel_raster_path],
+        task_name='make convolution kernel')
+
+    # This loop is so we don't duplicate code for each mask type with the
+    # only difference being the lulc codes and prefix
+    mask_task_path_map = {}
+    for mask_prefix, globio_codes in [
+            ('ag', GLOBIO_AG_CODES), ('hab', GLOBIO_NATURAL_CODES),
+            ('bmp', BMP_LULC_CODES)]:
+        mask_key = f'{landcover_key}_{mask_prefix}_mask'
+        mask_target_path = os.path.join(
+            CHURN_DIR, f'{mask_prefix}_mask',
+            f'{mask_key}.tif')
+        mask_task = task_graph.add_task(
+            func=mask_raster,
+            args=(landcover_path, globio_codes, mask_target_path),
+            target_path_list=[mask_target_path],
+            task_name=f'mask {mask_key}',)
+
+        mask_task_path_map[mask_prefix] = (mask_task, mask_target_path)
+
+
+    pollhab_2km_prop_path = os.path.join(
+        CHURN_DIR, 'pollhab_2km_prop',
+        f'pollhab_2km_prop_{landcover_key}.tif')
+    pollhab_2km_prop_task = task_graph.add_task(
+        func=pygeoprocessing.convolve_2d,
+        args=[
+            (mask_task_path_map['hab'][1], 1), (kernel_raster_path, 1),
+            pollhab_2km_prop_path],
+        kwargs={
+            'working_dir': CHURN_DIR,
+            'ignore_nodata_and_edges': True},
+        dependent_task_list=[mask_task_path_map['hab'][0], kernel_task],
+        target_path_list=[pollhab_2km_prop_path],
+        task_name=(
+            'calculate proportional'
+            f' {os.path.basename(pollhab_2km_prop_path)}'))
+
+    # calculate pollhab_2km_prop_on_ag_10s by multiplying pollhab_2km_prop
+    # by the ag mask
+
+    pollhab_2km_prop_on_ag_path = output_path
+    pollhab_2km_prop_on_ag_task = task_graph.add_task(
+        func=mult_rasters,
+        args=(
+            mask_task_path_map['ag'][1], pollhab_2km_prop_path,
+            pollhab_2km_prop_on_ag_path),
+        target_path_list=[pollhab_2km_prop_on_ag_path],
+        dependent_task_list=[
+            pollhab_2km_prop_task, mask_task_path_map['ag'][0]],
+        task_name=(
+            f'''pollhab 2km prop on ag {
+                os.path.basename(pollhab_2km_prop_on_ag_path)}'''))
+
+    #  1.1.4.  Sufficiency threshold A threshold of 0.3 was set to
+    #  evaluate whether there was sufficient pollinator habitat in the 2
+    #  km around farmland to provide pollination services, based on
+    #  Kremen et al.'s (2005)  estimate of the area requirements for
+    #  achieving full pollination. This produced a map of wild
+    #  pollination sufficiency where every agricultural pixel was
+    #  designated in a binary fashion: 0 if proportional area of habitat
+    #  was less than 0.3; 1 if greater than 0.3. Maps of pollination
+    #  sufficiency can be found at (permanent link to output), outputs
+    #  "poll_suff_..." below.
+
+    do_threhold = False
+    if do_threhold:
+        threshold_val = 0.3
+        pollinator_suff_hab_path = os.path.join(
+            CHURN_DIR, 'poll_suff_hab_ag_coverage_rasters',
+            f'poll_suff_ag_coverage_prop_10s_{landcover_key}.tif')
+        poll_suff_task = task_graph.add_task(
+            func=threshold_select_raster,
+            args=(
+                pollhab_2km_prop_path,
+                mask_task_path_map['ag'][1], threshold_val,
+                pollinator_suff_hab_path),
+            target_path_list=[pollinator_suff_hab_path],
+            dependent_task_list=[
+                pollhab_2km_prop_task, mask_task_path_map['ag'][0]],
+            task_name=f"""poll_suff_ag_coverage_prop {
+                os.path.basename(pollinator_suff_hab_path)}""")
+
+def create_radial_convolution_mask(
+        pixel_size_degree, radius_meters, kernel_filepath):
+    """Create a radial mask to sample pixels in convolution filter.
+    Parameters:
+        pixel_size_degree (float): size of pixel in degrees.
+        radius_meters (float): desired size of radial mask in meters.
+    Returns:
+        A 2D numpy array that can be used in a convolution to aggregate a
+        raster while accounting for partial coverage of the circle on the
+        edges of the pixel.
+    """
+    degree_len_0 = 110574  # length at 0 degrees
+    degree_len_60 = 111412  # length at 60 degrees
+    pixel_size_m = pixel_size_degree * (degree_len_0 + degree_len_60) / 2.0
+    pixel_radius = numpy.ceil(radius_meters / pixel_size_m)
+    n_pixels = (int(pixel_radius) * 2 + 1)
+    sample_pixels = 200
+    mask = numpy.ones((sample_pixels * n_pixels, sample_pixels * n_pixels))
+    mask[mask.shape[0]//2, mask.shape[0]//2] = 0
+    distance_transform = scipy.ndimage.morphology.distance_transform_edt(mask)
+    mask = None
+    stratified_distance = distance_transform * pixel_size_m / sample_pixels
+    distance_transform = None
+    in_circle = numpy.where(stratified_distance <= 2000.0, 1.0, 0.0)
+    stratified_distance = None
+    reshaped = in_circle.reshape(
+        in_circle.shape[0] // sample_pixels, sample_pixels,
+        in_circle.shape[1] // sample_pixels, sample_pixels)
+    kernel_array = numpy.sum(reshaped, axis=(1, 3)) / sample_pixels**2
+    normalized_kernel_array = kernel_array / numpy.sum(kernel_array)
+    reshaped = None
+
+    driver = gdal.GetDriverByName('GTiff')
+    kernel_raster = driver.Create(
+        kernel_filepath.encode('utf-8'), n_pixels, n_pixels, 1,
+        gdal.GDT_Float32, options=[
+            'BIGTIFF=IF_SAFER', 'TILED=YES', 'BLOCKXSIZE=256',
+            'BLOCKYSIZE=256'])
+
+    # Make some kind of geotransform, it doesn't matter what but
+    # will make GIS libraries behave better if it's all defined
+    kernel_raster.SetGeoTransform([-180, 1, 0, 90, 0, -1])
+    srs = osr.SpatialReference()
+    srs.ImportFromEPSG(4326)
+    kernel_raster.SetProjection(srs.ExportToWkt())
+    kernel_band = kernel_raster.GetRasterBand(1)
+    kernel_band.SetNoDataValue(NODATA)
+    kernel_band.WriteArray(normalized_kernel_array)
+
+def mask_raster(base_path, codes, target_path):
+    LOGGER = logging.getLogger('pollination')
+    logging.getLogger('taskgraph').setLevel(logging.INFO)
+    """Mask `base_path` to 1 where values are in codes. 0 otherwise.
+    Parameters:
+        base_path (string): path to single band integer raster.
+        codes (list): list of integer or tuple integer pairs. Membership in
+            `codes` or within the inclusive range of a tuple in `codes`
+            is sufficient to mask the corresponding raster integer value
+            in `base_path` to 1 for `target_path`.
+        target_path (string): path to desired mask raster. Any corresponding
+            pixels in `base_path` that also match a value or range in
+            `codes` will be masked to 1 in `target_path`. All other values
+            are 0.
+    Returns:
+        None.
+    """
+    code_list = numpy.array([
+        item for sublist in [
+            range(x[0], x[1]+1) if isinstance(x, tuple) else [x]
+            for x in codes] for item in sublist])
+    LOGGER.debug(f'expanded code array {code_list}')
+
+    base_nodata = pygeoprocessing.get_raster_info(base_path)['nodata'][0]
+
+    def mask_codes_op(base_array, codes_array):
+        """Return a bool raster if value in base_array is in codes_array."""
+        result = numpy.empty(base_array.shape, dtype=numpy.int8)
+        result[:] = _MASK_NODATA
+        valid_mask = base_array != base_nodata
+        result[valid_mask] = numpy.isin(
+            base_array[valid_mask], codes_array)
+        return result
+
+    pygeoprocessing.raster_calculator(
+        [(base_path, 1), (code_list, 'raw')], mask_codes_op, target_path,
+        gdal.GDT_Byte, 2)
+
+def mult_rasters(raster_a_path, raster_b_path, target_path):
+    """Multiply a by b and skip nodata."""
+    raster_info_a = pygeoprocessing.get_raster_info(raster_a_path)
+    raster_info_b = pygeoprocessing.get_raster_info(raster_b_path)
+
+    nodata_a = raster_info_a['nodata'][0]
+    nodata_b = raster_info_b['nodata'][0]
+
+    if raster_info_a['raster_size'] != raster_info_b['raster_size']:
+        aligned_raster_a_path = (
+            target_path + os.path.basename(raster_a_path) + '_aligned.tif')
+        aligned_raster_b_path = (
+            target_path + os.path.basename(raster_b_path) + '_aligned.tif')
+        pygeoprocessing.align_and_resize_raster_stack(
+            [raster_a_path, raster_b_path],
+            [aligned_raster_a_path, aligned_raster_b_path],
+            ['near'] * 2, raster_info_a['pixel_size'], 'intersection')
+        raster_a_path = aligned_raster_a_path
+        raster_b_path = aligned_raster_b_path
+
+    def _mult_raster_op(array_a, array_b, nodata_a, nodata_b, target_nodata):
+        """Multiply a by b and skip nodata."""
+        result = numpy.empty(array_a.shape, dtype=numpy.float32)
+        result[:] = target_nodata
+        valid_mask = (array_a != nodata_a) & (array_b != nodata_b)
+        result[valid_mask] = array_a[valid_mask] * array_b[valid_mask]
+        return result
+
+    pygeoprocessing.raster_calculator(
+        [(raster_a_path, 1), (raster_b_path, 1), (nodata_a, 'raw'),
+         (nodata_b, 'raw'), (_MULT_NODATA, 'raw')], _mult_raster_op,
+        target_path, gdal.GDT_Float32, _MULT_NODATA)
+
+def threshold_select_raster(
+        base_raster_path, select_raster_path, threshold_val, target_path):
+    """Select `select` if `base` >= `threshold_val`.
+    Parameters:
+        base_raster_path (string): path to single band raster that will be
+            used to determine the threshold mask to select from
+            `select_raster_path`.
+        select_raster_path (string): path to single band raster to pass
+            through to target if aligned `base` pixel is >= `threshold_val`
+            0 otherwise, or nodata if base == nodata. Must be the same
+            shape as `base_raster_path`.
+        threshold_val (numeric): value to use as threshold cutoff
+        target_path (string): path to desired output raster, raster is a
+            byte type with same dimensions and projection as
+            `base_raster_path`. A pixel in this raster will be `select` if
+            the corresponding pixel in `base_raster_path` is >=
+            `threshold_val`, 0 otherwise or nodata if `base` == nodata.
+    Returns:
+        None.
+    """
+    base_nodata = pygeoprocessing.get_raster_info(
+        base_raster_path)['nodata'][0]
+    target_nodata = -9999.
+
+    def threshold_select_op(
+            base_array, select_array, threshold_val, base_nodata,
+            target_nodata):
+        result = numpy.empty(select_array.shape, dtype=numpy.float32)
+        result[:] = target_nodata
+        valid_mask = (base_array != base_nodata) & (
+            select_array >= 0) & (select_array <= 1)
+        result[valid_mask] = select_array[valid_mask] * numpy.interp(
+            base_array[valid_mask], [0, threshold_val], [0.0, 1.0], 0, 1)
+        return result
+
+    pygeoprocessing.raster_calculator(
+        [(base_raster_path, 1), (select_raster_path, 1),
+         (threshold_val, 'raw'), (base_nodata, 'raw'),
+         (target_nodata, 'raw')], threshold_select_op,
+        target_path, gdal.GDT_Float32, target_nodata)
+    
diff --git a/scripts/pollination_shocks.py b/scripts/pollination_shocks.py
new file mode 100644
index 0000000..c31be61
--- /dev/null
+++ b/scripts/pollination_shocks.py
@@ -0,0 +1,249 @@
+import os
+import hazelbean as hb
+import numpy as np
+import pandas as pd
+import logging
+import pygeoprocessing
+import csv
+
+def resample_raster(src, dest, pixel_size, projection_wkt, bounding_box):
+    pygeoprocessing.align_and_resize_raster_stack(
+        [src], [dest], ['near'], pixel_size,
+        target_projection_wkt=projection_wkt,
+        bounding_box_mode=bounding_box
+    )
+
+def calculate_pollinator_adjusted_value(lulc, poll_suff, crop_value_max_lost, crop_value_baseline, year, sufficient_pollination_threshold, logger):
+    logger.info(f'Calculating crop value adjusted for pollination sufficiency in {year}')
+    return np.where(
+        (crop_value_max_lost > 0) & (poll_suff < sufficient_pollination_threshold) & (lulc == 2),
+        crop_value_baseline - crop_value_max_lost * (1 - (1 / sufficient_pollination_threshold) * poll_suff),
+        np.where(
+            (crop_value_max_lost > 0) & (poll_suff >= sufficient_pollination_threshold) & (lulc == 2),
+            crop_value_baseline,
+            -9999.
+        )
+    )
+
+def calculate_crop_value_and_shock(lulc_baseline_path, lulc_scenario_path, poll_suff_baseline_path, poll_suff_scenario_path, 
+                                   crop_data_dir, pollination_dependence_spreadsheet_input_path, output_dir, shock_value_path, 
+                                   scenario_label, year, base_year):
+    # Setup logging
+    logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
+    logger = logging.getLogger()
+
+    # Create output paths with unique filenames for each scenario
+    crop_value_difference_path = os.path.join(output_dir, f'crop_value_difference_from_baseline_to_{scenario_label}_{year}.tif')
+    crop_value_pollinator_adjusted_output_path = os.path.join(output_dir, f'crop_value_pollinator_adjusted_{scenario_label}_{year}.tif')
+
+    # Threshold for sufficient pollination
+    sufficient_pollination_threshold = 0.3
+
+    # Step 1: Load crop dependence data
+    df_dependence = pd.read_excel(pollination_dependence_spreadsheet_input_path, sheet_name='Crop nutrient content')
+    crop_names = list(df_dependence['Crop map file name'])[:-3]  # Removing the last 3 crops that don't have matching production data
+    pollination_dependence = list(df_dependence['poll.dep'])
+
+    # Step 2: Initialize arrays for calculations
+    ref_raster = os.path.join(crop_data_dir, 'apple', 'apple_Production.tif')
+    ha_shape = hb.get_shape_from_dataset_path(ref_raster)
+    crop_value_baseline = np.zeros(ha_shape)
+    crop_value_no_pollination = np.zeros(ha_shape)
+
+    # Step 3: Calculate baseline crop value and no-pollination value
+    for c, crop_name in enumerate(crop_names):
+        logger.info(f'Calculating crop value for {crop_name} with pollination dependence {pollination_dependence[c]}')
+        
+        # Load crop production data
+        crop_yield_path = os.path.join(crop_data_dir, f'{crop_name}', f'{crop_name}_Production.tif')
+        crop_yield = hb.as_array(crop_yield_path)
+        crop_yield = np.where(crop_yield > 0, crop_yield, 0.0)
+
+        # Calculate value (only based on production)
+        crop_value_baseline += crop_yield
+        crop_value_no_pollination += crop_yield * (1 - float(pollination_dependence[c]))
+
+    # Step 4: Calculate maximum loss due to pollination
+    crop_value_max_lost = crop_value_baseline - crop_value_no_pollination
+
+    crop_value_baseline_path = os.path.join(output_dir, f'crop_value_baseline_{scenario_label}_{base_year}.tif')
+    crop_value_max_lost_path = os.path.join(output_dir, f'crop_value_max_lost_{scenario_label}_{year}.tif')
+
+    # Step 5: Save the baseline crop values
+    hb.save_array_as_geotiff(crop_value_baseline, crop_value_baseline_path, ref_raster, ndv=-9999, data_type=6)
+    hb.save_array_as_geotiff(crop_value_max_lost, crop_value_max_lost_path, ref_raster, ndv=-9999, data_type=6)
+
+    # Step 6: Resample Rasters
+    raster_info = pygeoprocessing.get_raster_info(lulc_scenario_path)
+    target_pixel_size = raster_info['pixel_size']
+    target_projection_wkt = raster_info['projection_wkt']
+    target_bb = raster_info['bounding_box']
+
+    resampled_crop_value_baseline_path = os.path.join(output_dir, f'resampled_crop_value_baseline_{scenario_label}_{base_year}.tif')
+    resampled_crop_value_max_lost_path = os.path.join(output_dir, f'resampled_crop_value_max_lost_{scenario_label}_{year}.tif')
+    resampled_poll_suff_baseline_path = os.path.join(output_dir, f'resampled_poll_suff_{scenario_label}_{base_year}.tif')
+    resampled_poll_suff_scenario_path = os.path.join(output_dir, f'resampled_poll_suff_{scenario_label}_{year}.tif')
+    resampled_lulc_baseline_path = os.path.join(output_dir, f'resampled_lulc_{scenario_label}_{base_year}.tif')
+
+    resample_raster(crop_value_baseline_path, resampled_crop_value_baseline_path, target_pixel_size, target_projection_wkt, target_bb)
+    resample_raster(crop_value_max_lost_path, resampled_crop_value_max_lost_path, target_pixel_size, target_projection_wkt, target_bb)
+    resample_raster(poll_suff_baseline_path, resampled_poll_suff_baseline_path, target_pixel_size, target_projection_wkt, target_bb)
+    resample_raster(poll_suff_scenario_path, resampled_poll_suff_scenario_path, target_pixel_size, target_projection_wkt, target_bb)
+    resample_raster(lulc_baseline_path, resampled_lulc_baseline_path, target_pixel_size, target_projection_wkt, target_bb)
+
+    # Step 7: Calculate Crop Value Adjusted for Pollination Sufficiency in 2017 and 2022
+    lulc_baseline = hb.as_array(resampled_lulc_baseline_path)
+    poll_suff_baseline = hb.as_array(resampled_poll_suff_baseline_path)
+    crop_value_max_lost_aoi = hb.as_array(resampled_crop_value_max_lost_path)
+    crop_value_baseline_aoi = hb.as_array(resampled_crop_value_baseline_path)
+
+    crop_value_pollinator_adjusted_baseline = calculate_pollinator_adjusted_value(
+        lulc_baseline, poll_suff_baseline, crop_value_max_lost_aoi, crop_value_baseline_aoi, base_year, sufficient_pollination_threshold, logger)
+
+    hb.save_array_as_geotiff(crop_value_pollinator_adjusted_baseline, 
+                             os.path.join(output_dir, f'crop_value_pollinator_adjusted_{scenario_label}_{base_year}.tif'),
+                              lulc_baseline_path, ndv=-9999, data_type=6)
+
+    lulc_scenario = hb.as_array(lulc_scenario_path)
+    poll_suff_scenario = hb.as_array(resampled_poll_suff_scenario_path)
+
+    crop_value_pollinator_adjusted_scenario = calculate_pollinator_adjusted_value(
+        lulc_scenario, poll_suff_scenario, crop_value_max_lost_aoi, crop_value_baseline_aoi, year, sufficient_pollination_threshold, logger)
+
+    hb.save_array_as_geotiff(crop_value_pollinator_adjusted_scenario, crop_value_pollinator_adjusted_output_path, lulc_scenario_path, ndv=-9999, data_type=6)
+
+    # Step 8: Calculate Shock Value
+    logger.info('Calculating shock value')
+    shock = 1 - ((np.sum(crop_value_pollinator_adjusted_scenario) - np.sum(crop_value_pollinator_adjusted_baseline)) / np.sum(crop_value_pollinator_adjusted_baseline))
+    logger.info(f'Shock value for scenario {scenario_label} (baseline: {lulc_baseline_path}, scenario: {lulc_scenario_path}): {shock}')
+
+    # Step 9: Append Shock Value to CSV File
+    with open(shock_value_path, 'a', newline='') as shock_file:
+        writer = csv.writer(shock_file)
+        writer.writerow([scenario_label, shock, year, lulc_baseline_path, lulc_scenario_path])
+
+    return shock
+
+
+
+# import os
+# import hazelbean as hb
+# import numpy as np
+# import pandas as pd
+# import logging
+# import pygeoprocessing
+
+# def resample_raster(src, dest, pixel_size, projection_wkt, bounding_box):
+#     pygeoprocessing.align_and_resize_raster_stack(
+#         [src], [dest], ['near'], pixel_size,
+#         target_projection_wkt=projection_wkt,
+#         bounding_box_mode=bounding_box
+#     )
+
+# def calculate_pollinator_adjusted_value(lulc, poll_suff, crop_value_max_lost, crop_value_baseline, year, sufficient_pollination_threshold, logger):
+#     logger.info(f'Calculating crop value adjusted for pollination sufficiency in {year}')
+#     return np.where(
+#         (crop_value_max_lost > 0) & (poll_suff < sufficient_pollination_threshold) & (lulc == 2),
+#         crop_value_baseline - crop_value_max_lost * (1 - (1 / sufficient_pollination_threshold) * poll_suff),
+#         np.where(
+#             (crop_value_max_lost > 0) & (poll_suff >= sufficient_pollination_threshold) & (lulc == 2),
+#             crop_value_baseline,
+#             -9999.
+#         )
+#     )
+
+# def calculate_crop_value_and_shock(lulc_2017_path, lulc_2022_path, poll_suff_2017_path, poll_suff_2022_path, crop_data_dir, pollination_dependence_spreadsheet_input_path, output_dir,shock_value_path):
+#     # Setup logging
+#     logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
+#     logger = logging.getLogger()
+
+#     # Create output paths
+#     crop_value_difference_path = os.path.join(output_dir, 'crop_value_difference_from_baseline_to_2022.tif')
+#     crop_value_pollinator_adjusted_output_path = os.path.join(output_dir, 'crop_value_pollinator_adjusted_2022.tif')
+
+#     # Threshold for sufficient pollination
+#     sufficient_pollination_threshold = 0.3
+
+#     # Step 1: Load crop dependence data
+#     df_dependence = pd.read_excel(pollination_dependence_spreadsheet_input_path, sheet_name='Crop nutrient content')
+#     crop_names = list(df_dependence['Crop map file name'])[:-3]  # Removing the last 3 crops that don't have matching production data
+#     pollination_dependence = list(df_dependence['poll.dep'])
+
+#     # Step 2: Initialize arrays for calculations
+#     ref_raster = os.path.join(crop_data_dir, 'apple', 'apple_Production.tif')
+#     ha_shape = hb.get_shape_from_dataset_path(ref_raster)
+#     crop_value_baseline = np.zeros(ha_shape)
+#     crop_value_no_pollination = np.zeros(ha_shape)
+
+#     # Step 3: Calculate baseline crop value and no-pollination value
+#     for c, crop_name in enumerate(crop_names):
+#         logger.info(f'Calculating crop value for {crop_name} with pollination dependence {pollination_dependence[c]}')
+        
+#         # Load crop production data
+#         crop_yield_path = os.path.join(crop_data_dir, f'{crop_name}', f'{crop_name}_Production.tif')
+#         crop_yield = hb.as_array(crop_yield_path)
+#         crop_yield = np.where(crop_yield > 0, crop_yield, 0.0)
+
+#         # Calculate value (only based on production)
+#         crop_value_baseline += crop_yield
+#         crop_value_no_pollination += crop_yield * (1 - float(pollination_dependence[c]))
+
+#     # Step 4: Calculate maximum loss due to pollination
+#     crop_value_max_lost = crop_value_baseline - crop_value_no_pollination
+
+#     crop_value_baseline_path = os.path.join(output_dir, 'crop_value_baseline.tif')
+#     crop_value_max_lost_path = os.path.join(output_dir, 'crop_value_max_lost.tif')
+
+#     # Step 5: Save the baseline crop values
+#     hb.save_array_as_geotiff(crop_value_baseline, crop_value_baseline_path, ref_raster, ndv=-9999, data_type=6)
+#     hb.save_array_as_geotiff(crop_value_max_lost, crop_value_max_lost_path, ref_raster, ndv=-9999, data_type=6)
+
+#     # Step 6: Resample Rasters
+#     raster_info = pygeoprocessing.get_raster_info(lulc_2022_path)
+#     target_pixel_size = raster_info['pixel_size']
+#     target_projection_wkt = raster_info['projection_wkt']
+#     target_bb = raster_info['bounding_box']
+
+#     resampled_crop_value_baseline_path = os.path.join(output_dir, 'resampled_crop_value_baseline.tif')
+#     resampled_crop_value_max_lost_path = os.path.join(output_dir, 'resampled_crop_value_max_lost.tif')
+#     resampled_poll_suff_2017_path = os.path.join(output_dir, 'resampled_poll_suff_2017.tif')
+#     resampled_poll_suff_2022_path = os.path.join(output_dir, 'resampled_poll_suff_2022.tif')
+#     resampled_lulc_2017_path = os.path.join(output_dir, 'resampled_lulc_2017.tif')
+
+#     resample_raster(crop_value_baseline_path, resampled_crop_value_baseline_path, target_pixel_size, target_projection_wkt, target_bb)
+#     resample_raster(crop_value_max_lost_path, resampled_crop_value_max_lost_path, target_pixel_size, target_projection_wkt, target_bb)
+#     resample_raster(poll_suff_2017_path, resampled_poll_suff_2017_path, target_pixel_size, target_projection_wkt, target_bb)
+#     resample_raster(poll_suff_2022_path, resampled_poll_suff_2022_path, target_pixel_size, target_projection_wkt, target_bb)
+#     resample_raster(lulc_2017_path, resampled_lulc_2017_path, target_pixel_size, target_projection_wkt, target_bb)
+
+#     # Step 7: Calculate Crop Value Adjusted for Pollination Sufficiency in 2017 and 2022
+#     lulc_2017 = hb.as_array(resampled_lulc_2017_path)
+#     poll_suff_2017 = hb.as_array(resampled_poll_suff_2017_path)
+#     crop_value_max_lost_aoi = hb.as_array(resampled_crop_value_max_lost_path)
+#     crop_value_baseline_aoi = hb.as_array(resampled_crop_value_baseline_path)
+
+#     crop_value_pollinator_adjusted_2017 = calculate_pollinator_adjusted_value(
+#         lulc_2017, poll_suff_2017, crop_value_max_lost_aoi, crop_value_baseline_aoi, 2017, sufficient_pollination_threshold, logger)
+
+#     hb.save_array_as_geotiff(crop_value_pollinator_adjusted_2017, os.path.join(output_dir, 'crop_value_pollinator_adjusted_2017.tif'), lulc_2017_path, ndv=-9999, data_type=6)
+
+#     lulc_2022 = hb.as_array(lulc_2022_path)
+#     poll_suff_2022 = hb.as_array(resampled_poll_suff_2022_path)
+
+#     crop_value_pollinator_adjusted_2022 = calculate_pollinator_adjusted_value(
+#         lulc_2022, poll_suff_2022, crop_value_max_lost_aoi, crop_value_baseline_aoi, 2022, sufficient_pollination_threshold, logger)
+
+#     hb.save_array_as_geotiff(crop_value_pollinator_adjusted_2022, crop_value_pollinator_adjusted_output_path, lulc_2022_path, ndv=-9999, data_type=6)
+
+#     # Step 8: Calculate Shock Value
+#     logger.info('Calculating shock value')
+#     shock = 1- ((np.sum(crop_value_pollinator_adjusted_2022) - np.sum(crop_value_pollinator_adjusted_2017)) / np.sum(crop_value_pollinator_adjusted_2017))
+#     logger.info(f'Shock value: {shock}')
+
+#     return shock
+
+#     # Save shock value to a text file
+#     # shock_value_path = os.path.join(output_dir, 'shock_value.txt')
+#     with open(shock_value_path, 'w') as shock_file:
+#         shock_file.write(f'Shock value: {shock}')
+

From 1f373628bbe7b8dec685c9e255c2bc8199813b2a Mon Sep 17 00:00:00 2001
From: Saleh Mamun <mamun4105@gmail.com>
Date: Fri, 15 Nov 2024 17:01:28 -0600
Subject: [PATCH 3/7] Added a local version of InVEST SDR model

This version of InVEST SDR is modified such that it has light computation compared to InVEST SDR model. I did not run the whole SDR model because we only use usle output from SDR.  Running a low computation version makes sense to save time.
---
 scripts/sdr_local.py | 1545 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 1545 insertions(+)
 create mode 100644 scripts/sdr_local.py

diff --git a/scripts/sdr_local.py b/scripts/sdr_local.py
new file mode 100644
index 0000000..211ecac
--- /dev/null
+++ b/scripts/sdr_local.py
@@ -0,0 +1,1545 @@
+"""InVEST Sediment Delivery Ratio (SDR) module.
+
+The SDR method in this model is based on:
+    Winchell, M. F., et al. "Extension and validation of a geographic
+    information system-based method for calculating the Revised Universal
+    Soil Loss Equation length-slope factor for erosion risk assessments in
+    large watersheds." Journal of Soil and Water Conservation 63.3 (2008):
+    105-111.
+"""
+import logging
+import os
+
+import numpy
+import pygeoprocessing
+import pygeoprocessing.routing
+import taskgraph
+from osgeo import gdal
+from osgeo import ogr
+
+from natcap.invest import gettext
+from natcap.invest import spec_utils
+from natcap.invest import utils
+from natcap.invest import validation
+from natcap.invest.model_metadata import MODEL_METADATA
+from natcap.invest.unit_registry import u
+from natcap.invest.sdr import sdr_core
+
+LOGGER = logging.getLogger(__name__)
+
+MODEL_SPEC = {
+    "model_name": MODEL_METADATA["sdr"].model_title,
+    "pyname": MODEL_METADATA["sdr"].pyname,
+    "userguide": MODEL_METADATA["sdr"].userguide,
+    "args_with_spatial_overlap": {
+        "spatial_keys": ["dem_path", "erosivity_path", "erodibility_path",
+                         "lulc_path", "drainage_path", "watersheds_path", ],
+        "different_projections_ok": False,
+    },
+    "args": {
+        "workspace_dir": spec_utils.WORKSPACE,
+        "results_suffix": spec_utils.SUFFIX,
+        "n_workers": spec_utils.N_WORKERS,
+        "dem_path": {
+            **spec_utils.DEM,
+            "projected": True
+        },
+        "erosivity_path": {
+            "type": "raster",
+            "bands": {1: {
+                "type": "number",
+                "units": u.megajoule*u.millimeter/(u.hectare*u.hour*u.year)}},
+            "projected": True,
+            "about": gettext(
+                "Map of rainfall erosivity, reflecting the intensity and "
+                "duration of rainfall in the area of interest."),
+            "name": gettext("erosivity")
+        },
+        "erodibility_path": {
+            "type": "raster",
+            "bands": {1: {
+                "type": "number",
+                "units": u.metric_ton*u.hectare*u.hour/(u.hectare*u.megajoule*u.millimeter)}},
+            "projected": True,
+            "about": gettext(
+                "Map of soil erodibility, the susceptibility of soil "
+                "particles to detachment and transport by rainfall and "
+                "runoff."),
+            "name": gettext("soil erodibility")
+        },
+        "lulc_path": {
+            **spec_utils.LULC,
+            "projected": True,
+            "about": spec_utils.LULC['about'] + " " + gettext(
+                "All values in this raster must "
+                "have corresponding entries in the Biophysical Table.")
+        },
+        "watersheds_path": {
+            "type": "vector",
+            "geometries": spec_utils.POLYGONS,
+            "projected": True,
+            "fields": {},
+            "about": gettext(
+                "Map of the boundaries of the watershed(s) over which to "
+                "aggregate results. Each watershed should contribute to a "
+                "point of interest where water quality will be analyzed."),
+            "name": gettext("Watersheds")
+        },
+        "biophysical_table_path": {
+            "type": "csv",
+            "index_col": "lucode",
+            "columns": {
+                "lucode": spec_utils.LULC_TABLE_COLUMN,
+                "usle_c": {
+                    "type": "ratio",
+                    "about": gettext("Cover-management factor for the USLE")},
+                "usle_p": {
+                    "type": "ratio",
+                    "about": gettext("Support practice factor for the USLE")}
+            },
+            "about": gettext(
+                "A table mapping each LULC code to biophysical properties of "
+                "that LULC class. All values in the LULC raster must have "
+                "corresponding entries in this table."),
+            "name": gettext("biophysical table")
+        },
+        "threshold_flow_accumulation": spec_utils.THRESHOLD_FLOW_ACCUMULATION,
+        "k_param": {
+            "type": "number",
+            "units": u.none,
+            "about": gettext("Borselli k parameter."),
+            "name": gettext("Borselli k parameter")
+        },
+        "sdr_max": {
+            "type": "ratio",
+            "about": gettext("The maximum SDR value that a pixel can have."),
+            "name": gettext("maximum SDR value")
+        },
+        "ic_0_param": {
+            "type": "number",
+            "units": u.none,
+            "about": gettext("Borselli IC0 parameter."),
+            "name": gettext("Borselli IC0 parameter")
+        },
+        "l_max": {
+            "type": "number",
+            "expression": "value > 0",
+            "units": u.none,
+            "about": gettext(
+                "The maximum allowed value of the slope length parameter (L) "
+                "in the LS factor."),
+            "name": gettext("maximum l value"),
+        },
+        "drainage_path": {
+            "type": "raster",
+            "bands": {1: {"type": "integer"}},
+            "required": False,
+            "about": gettext(
+                "Map of locations of artificial drainages that drain to the "
+                "watershed. Pixels with 1 are drainages and are treated like "
+                "streams. Pixels with 0 are not drainages."),
+            "name": gettext("drainages")
+        }
+    },
+    "outputs": {
+        "avoided_erosion.tif": {
+            "about": "The contribution of vegetation to keeping soil from eroding from each pixel. (Eq. (82))",
+            "bands": {1: {
+                "type": "number",
+                "units": u.metric_ton/u.pixel
+            }}
+        },
+        "avoided_export.tif": {
+            "about": "The contribution of vegetation to keeping erosion from entering a stream. This combines local/on-pixel sediment retention with trapping of erosion from upslope of the pixel. (Eq. (83))",
+            "bands": {1: {
+                "type": "number",
+                "units": u.metric_ton/u.pixel
+            }}
+        },
+        "rkls.tif": {
+            "bands": {1: {
+                "type": "number",
+                "units": u.metric_ton/u.pixel
+            }},
+            "about": "Total potential soil loss per pixel in the original land cover from the RKLS equation. Equivalent to the soil loss for bare soil. (Eq. (68), without applying the C or P factors)."
+        },
+        "sed_deposition.tif": {
+            "about": "The total amount of sediment deposited on the pixel from upslope sources as a result of trapping. (Eq. (80))",
+            "bands": {1: {
+                "type": "number",
+                "units": u.metric_ton/u.pixel
+            }}
+        },
+        "sed_export.tif": {
+            "about": "The total amount of sediment exported from each pixel that reaches the stream. (Eq. (76))",
+            "bands": {1: {
+                "type": "number",
+                "units": u.metric_ton/u.pixel
+            }}
+        },
+        "stream.tif": spec_utils.STREAM,
+        "stream_and_drainage.tif": {
+            "created_if": "drainage_path",
+            "about": "This raster is the union of that layer with the calculated stream layer(Eq. (85)). Values of 1 represent streams, values of 0 are non-stream pixels.",
+            "bands": {1: {"type": "integer"}}
+        },
+        "usle.tif": {
+            "about": "Total potential soil loss per pixel in the original land cover calculated from the USLE equation. (Eq. (68))",
+            "bands": {1: {
+                "type": "number",
+                "units": u.metric_ton/u.pixel
+            }}
+        },
+        "watershed_results_sdr.shp": {
+            "about": "Table containing biophysical values for each watershed",
+            "geometries": spec_utils.POLYGONS,
+            "fields": {
+                "sed_export": {
+                    "type": "number",
+                    "units": u.metric_ton,
+                    "about": "Total amount of sediment exported to the stream per watershed. (Eq. (77) with sum calculated over the watershed area)"
+                },
+                "usle_tot": {
+                    "type": "number",
+                    "units": u.metric_ton,
+                    "about": "Total amount of potential soil loss in each watershed calculated by the USLE equation. (Sum of USLE from (68) over the watershed area)"
+                },
+                "avoid_exp": {
+                    "type": "number",
+                    "units": u.metric_ton,
+                    "about": "The sum of avoided export in the watershed."
+                },
+                "avoid_eros": {
+                    "type": "number",
+                    "units": u.metric_ton,
+                    "about": "The sum of avoided local erosion in the watershed"
+                },
+                "sed_dep": {
+                    "type": "number",
+                    "units": u.metric_ton,
+                    "about": "Total amount of sediment deposited on the landscape in each watershed, which does not enter the stream."
+                }
+            }
+        },
+        "intermediate_outputs": {
+            "type": "directory",
+            "contents": {
+                "cp.tif": {
+                    "about": gettext(
+                        "CP factor derived by mapping usle_c and usle_p from "
+                        "the biophysical table to the LULC raster."),
+                    "bands": {1: {"type": "ratio"}}
+                },
+                "d_dn.tif": {
+                    "about": gettext(
+                        "Downslope factor of the index of connectivity (Eq. (74))"),
+                    "bands": {1: {"type": "number", "units": u.none}}
+                },
+                "d_up.tif": {
+                    "about": gettext(
+                        "Upslope factor of the index of connectivity (Eq. (73))"),
+                    "bands": {1: {"type": "number", "units": u.none}}
+                },
+                "e_prime.tif": {
+                    "about": gettext(
+                        "Sediment downslope deposition, the amount of sediment "
+                        "from a given pixel that does not reach a stream (Eq. (78))"),
+                    "bands": {1: {
+                        "type": "number",
+                        "units": u.metric_ton/(u.hectare*u.year)
+                    }}
+                },
+                "f.tif": {
+                    "about": gettext(
+                        "Map of sediment flux for sediment that does not "
+                        "reach the stream (Eq. (81))"),
+                    "bands": {1: {
+                        "type": "number",
+                        "units": u.metric_ton/(u.hectare*u.year)
+                    }}
+                },
+                "flow_accumulation.tif": spec_utils.FLOW_ACCUMULATION,
+                "flow_direction.tif": spec_utils.FLOW_DIRECTION,
+                "ic.tif": {
+                    "about": gettext("Index of connectivity (Eq. (70))"),
+                    "bands": {1: {
+                        "type": "number",
+                        "units": u.none
+                    }}
+                },
+                "ls.tif": {
+                    "about": gettext("LS factor for USLE (Eq. (69))"),
+                    "bands": {1: {
+                        "type": "number",
+                        "units": u.none
+                    }}
+                },
+                "pit_filled_dem.tif": spec_utils.FILLED_DEM,
+                "s_accumulation.tif": {
+                    "about": gettext(
+                        "Flow accumulation weighted by the thresholded slope. "
+                        "Used in calculating s_bar."),
+                    "bands": {1: {
+                        "type": "number",
+                        "units": u.none
+                    }}
+                },
+                "s_bar.tif": {
+                    "about": gettext(
+                        "Mean thresholded slope gradient of the upslope "
+                        "contributing area (in eq. (73))"),
+                    "bands": {1: {
+                        "type": "number",
+                        "units": u.none
+                    }}
+                },
+                "sdr_factor.tif": {
+                    "about": gettext("Sediment delivery ratio (Eq. (75))"),
+                    "bands": {1: {"type": "ratio"}}
+                },
+                "slope.tif": spec_utils.SLOPE,
+                "slope_threshold.tif": {
+                    "about": gettext(
+                        "Percent slope, thresholded to be no less than 0.005 "
+                        "and no greater than 1 (eq. (71)). 1 is equivalent to "
+                        "a 45 degree slope."),
+                    "bands": {1: {"type": "ratio"}}
+                },
+                "w_accumulation.tif": {
+                    "about": gettext(
+                        "Flow accumulation weighted by the thresholded "
+                        "cover-management factor. Used in calculating w_bar."),
+                    "bands": {1: {
+                        "type": "number",
+                        "units": u.none
+                    }}
+                },
+                "w_bar.tif": {
+                    "about": gettext(
+                        "Mean thresholded cover-management factor for upslope "
+                        "contributing area (in eq. (73))"),
+                    "bands": {1: {"type": "ratio"}}
+                },
+                "w.tif": {
+                    "about": gettext(
+                        "Cover-management factor derived by mapping usle_c "
+                        "from the biophysical table to the LULC raster."),
+                    "bands": {1: {"type": "ratio"}}
+                },
+                "w_threshold.tif": {
+                    "about": gettext(
+                        "Cover-management factor thresholded to be no less "
+                        "than 0.001 (eq. (72))"),
+                    "bands": {1: {"type": "ratio"}}
+                },
+                "weighted_avg_aspect.tif": {
+                    "about": gettext(
+                        "Average aspect weighted by flow direction (in eq. (69))"),
+                    "bands": {1: {"type": "number", "units": u.none}}
+                },
+                "what_drains_to_stream.tif": {
+                    "about": gettext(
+                        "Map of which pixels drain to a stream. A value of "
+                        "1 means that at least some of the runoff from that "
+                        "pixel drains to a stream in stream.tif. A value of 0 "
+                        "means that it does not drain at all to any stream "
+                        "in stream.tif."),
+                    "bands": {1: {"type": "integer"}}
+                },
+                "ws_inverse.tif": {
+                    "about": gettext(
+                        "Inverse of the thresholded cover-management factor "
+                        "times the thresholded slope (in eq. (74))"),
+                    "bands": {1: {"type": "ratio"}}
+                },
+                "aligned_dem.tif": {
+                    "about": gettext(
+                        "Copy of the input DEM, clipped to the extent "
+                        "of the other raster inputs."),
+                    "bands": {1: {
+                        "type": "number",
+                        "units": u.meter
+                    }}
+                },
+                "aligned_drainage.tif": {
+                    "about": gettext(
+                        "Copy of the input drainage map, clipped to "
+                        "the extent of the other raster inputs and "
+                        "aligned to the DEM."),
+                    "bands": {1: {"type": "integer"}},
+                },
+                "aligned_erodibility.tif": {
+                    "about": gettext(
+                        "Copy of the input erodibility map, clipped to "
+                        "the extent of the other raster inputs and "
+                        "aligned to the DEM."),
+                    "bands": {1: {
+                        "type": "number",
+                        "units": u.metric_ton*u.hectare*u.hour/(u.hectare*u.megajoule*u.millimeter)
+                    }}
+                },
+                "aligned_erosivity.tif": {
+                    "about": gettext(
+                        "Copy of the input erosivity map, clipped to "
+                        "the extent of the other raster inputs and "
+                        "aligned to the DEM."),
+                    "bands": {1: {
+                        "type": "number",
+                        "units": u.megajoule*u.millimeter/(u.hectare*u.hour*u.year)
+                    }}
+                },
+                "aligned_lulc.tif": {
+                    "about": gettext(
+                        "Copy of the input Land Use Land Cover map, clipped to "
+                        "the extent of the other raster inputs and "
+                        "aligned to the DEM."),
+                    "bands": {1: {"type": "integer"}},
+                },
+                "mask.tif": {
+                    "about": gettext(
+                        "A raster aligned to the DEM and clipped to the "
+                        "extent of the other raster inputs. Pixel values "
+                        "indicate where a nodata value exists in the stack "
+                        "of aligned rasters (pixel value of 0), or if all "
+                        "values in the stack of rasters at this pixel "
+                        "location are valid."),
+                    "bands": {1: {"type": "integer"}},
+                },
+                "masked_dem.tif": {
+                    "about": gettext(
+                        "A copy of the aligned DEM, masked using the mask raster."
+                    ),
+                    "bands": {1: {
+                        "type": "number",
+                        "units": u.meter}},
+                },
+                "masked_drainage.tif": {
+                    "about": gettext(
+                        "A copy of the aligned drainage map, masked using the "
+                        "mask raster."
+                    ),
+                    "bands": {1: {"type": "integer"}}
+                },
+                "masked_erodibility.tif": {
+                    "about": gettext(
+                        "A copy of the aligned erodibility map, masked using "
+                        "the mask raster."
+                    ),
+                    "bands": {1: {
+                        "type": "number",
+                        "units": u.metric_ton*u.hectare*u.hour/(u.hectare*u.megajoule*u.millimeter)
+                    }},
+                },
+                "masked_erosivity.tif": {
+                    "about": gettext(
+                        "A copy of the aligned erosivity map, masked using "
+                        "the mask raster."),
+                    "bands": {1: {
+                        "type": "number",
+                        "units": u.megajoule*u.millimeter/(u.hectare*u.hour*u.year)
+                    }},
+                },
+                "masked_lulc.tif": {
+                    "about": gettext(
+                        "A copy of the aligned Land Use Land Cover map, "
+                        "masked using the mask raster."),
+                    "bands": {1: {"type": "integer"}},
+                }
+            },
+        },
+        "taskgraph_cache": spec_utils.TASKGRAPH_DIR
+    }
+}
+
+_OUTPUT_BASE_FILES = {
+    'rkls_path': 'rkls.tif',
+    'sed_export_path': 'sed_export.tif',
+    'sed_deposition_path': 'sed_deposition.tif',
+    'stream_and_drainage_path': 'stream_and_drainage.tif',
+    'stream_path': 'stream.tif',
+    'usle_path': 'usle.tif',
+    'watershed_results_sdr_path': 'watershed_results_sdr.shp',
+    'avoided_export_path': 'avoided_export.tif',
+    'avoided_erosion_path': 'avoided_erosion.tif',
+}
+
+INTERMEDIATE_DIR_NAME = 'intermediate_outputs'
+
+_INTERMEDIATE_BASE_FILES = {
+    'aligned_dem_path': 'aligned_dem.tif',
+    'aligned_drainage_path': 'aligned_drainage.tif',
+    'aligned_erodibility_path': 'aligned_erodibility.tif',
+    'aligned_erosivity_path': 'aligned_erosivity.tif',
+    'aligned_lulc_path': 'aligned_lulc.tif',
+    'mask_path': 'mask.tif',
+    'masked_dem_path': 'masked_dem.tif',
+    'masked_drainage_path': 'masked_drainage.tif',
+    'masked_erodibility_path': 'masked_erodibility.tif',
+    'masked_erosivity_path': 'masked_erosivity.tif',
+    'masked_lulc_path': 'masked_lulc.tif',
+    'cp_factor_path': 'cp.tif',
+    'd_dn_path': 'd_dn.tif',
+    'd_up_path': 'd_up.tif',
+    'f_path': 'f.tif',
+    'flow_accumulation_path': 'flow_accumulation.tif',
+    'flow_direction_path': 'flow_direction.tif',
+    'ic_path': 'ic.tif',
+    'ls_path': 'ls.tif',
+    'pit_filled_dem_path': 'pit_filled_dem.tif',
+    's_accumulation_path': 's_accumulation.tif',
+    's_bar_path': 's_bar.tif',
+    'sdr_path': 'sdr_factor.tif',
+    'slope_path': 'slope.tif',
+    'thresholded_slope_path': 'slope_threshold.tif',
+    'thresholded_w_path': 'w_threshold.tif',
+    'w_accumulation_path': 'w_accumulation.tif',
+    'w_bar_path': 'w_bar.tif',
+    'w_path': 'w.tif',
+    'ws_inverse_path': 'ws_inverse.tif',
+    'e_prime_path': 'e_prime.tif',
+    'drainage_mask': 'what_drains_to_stream.tif',
+}
+
+
+# Target nodata is for general rasters that are positive, and _IC_NODATA are
+# for rasters that are any range
+_TARGET_NODATA = -1.0
+_BYTE_NODATA = 255
+_IC_NODATA = float(numpy.finfo('float32').min)
+
+
+def execute(args):
+    """Sediment Delivery Ratio.
+
+    This function calculates the sediment export and retention of a landscape
+    using the sediment delivery ratio model described in the InVEST user's
+    guide.
+
+    Args:
+        args['workspace_dir'] (string): output directory for intermediate,
+            temporary, and final files
+        args['results_suffix'] (string): (optional) string to append to any
+            output file names
+        args['dem_path'] (string): path to a digital elevation raster
+        args['erosivity_path'] (string): path to rainfall erosivity index
+            raster
+        args['erodibility_path'] (string): a path to soil erodibility raster
+        args['lulc_path'] (string): path to land use/land cover raster
+        args['watersheds_path'] (string): path to vector of the watersheds
+        args['biophysical_table_path'] (string): path to CSV file with
+            biophysical information of each land use classes.  contain the
+            fields 'usle_c' and 'usle_p'
+        args['threshold_flow_accumulation'] (number): number of upslope pixels
+            on the dem to threshold to a stream.
+        args['k_param'] (number): k calibration parameter
+        args['sdr_max'] (number): max value the SDR
+        args['ic_0_param'] (number): ic_0 calibration parameter
+        args['drainage_path'] (string): (optional) path to drainage raster that
+            is used to add additional drainage areas to the internally
+            calculated stream layer
+        args['l_max'] (number): the maximum allowed value of the slope length
+            parameter (L) in the LS factor. If the calculated value of L
+            exceeds 'l_max' it will be clamped to this value.
+        args['n_workers'] (int): if present, indicates how many worker
+            processes should be used in parallel processing. -1 indicates
+            single process mode, 0 is single process but non-blocking mode,
+            and >= 1 is number of processes.
+
+    Returns:
+        None.
+
+    """
+    file_suffix = utils.make_suffix_string(args, 'results_suffix')
+    biophysical_df = validation.get_validated_dataframe(
+        args['biophysical_table_path'],
+        **MODEL_SPEC['args']['biophysical_table_path'])
+
+    # Test to see if c or p values are outside of 0..1
+    for key in ['usle_c', 'usle_p']:
+        for lulc_code, row in biophysical_df.iterrows():
+            if row[key] < 0 or row[key] > 1:
+                raise ValueError(
+                    f'A value in the biophysical table is not a number '
+                    f'within range 0..1. The offending value is in '
+                    f'column "{key}", lucode row "{lulc_code}", '
+                    f'and has value "{row[key]}"')
+
+    intermediate_output_dir = os.path.join(
+        args['workspace_dir'], INTERMEDIATE_DIR_NAME)
+    output_dir = os.path.join(args['workspace_dir'])
+    utils.make_directories([output_dir, intermediate_output_dir])
+
+    f_reg = utils.build_file_registry(
+        [(_OUTPUT_BASE_FILES, output_dir),
+         (_INTERMEDIATE_BASE_FILES, intermediate_output_dir)], file_suffix)
+
+    try:
+        n_workers = int(args['n_workers'])
+    except (KeyError, ValueError, TypeError):
+        # KeyError when n_workers is not present in args
+        # ValueError when n_workers is an empty string.
+        # TypeError when n_workers is None.
+        n_workers = -1  # Synchronous mode.
+    task_graph = taskgraph.TaskGraph(
+        os.path.join(output_dir, 'taskgraph_cache'),
+        n_workers, reporting_interval=5.0)
+
+    base_list = []
+    aligned_list = []
+    masked_list = []
+    input_raster_key_list = ['dem', 'lulc', 'erosivity', 'erodibility']
+    for file_key in input_raster_key_list:
+        base_list.append(args[f"{file_key}_path"])
+        aligned_list.append(f_reg[f"aligned_{file_key}_path"])
+        masked_list.append(f_reg[f"masked_{file_key}_path"])
+    # all continuous rasters can use bilinear, but lulc should be mode
+    interpolation_list = ['bilinear', 'mode', 'bilinear', 'bilinear']
+
+    drainage_present = False
+    if 'drainage_path' in args and args['drainage_path'] != '':
+        drainage_present = True
+        input_raster_key_list.append('drainage')
+        base_list.append(args['drainage_path'])
+        aligned_list.append(f_reg['aligned_drainage_path'])
+        masked_list.append(f_reg['masked_drainage_path'])
+        interpolation_list.append('near')
+
+    dem_raster_info = pygeoprocessing.get_raster_info(args['dem_path'])
+    min_pixel_size = numpy.min(numpy.abs(dem_raster_info['pixel_size']))
+    target_pixel_size = (min_pixel_size, -min_pixel_size)
+
+    target_sr_wkt = dem_raster_info['projection_wkt']
+    vector_mask_options = {'mask_vector_path': args['watersheds_path']}
+    align_task = task_graph.add_task(
+        func=pygeoprocessing.align_and_resize_raster_stack,
+        args=(
+            base_list, aligned_list, interpolation_list,
+            target_pixel_size, 'intersection'),
+        kwargs={
+            'target_projection_wkt': target_sr_wkt,
+            'base_vector_path_list': (args['watersheds_path'],),
+            'raster_align_index': 0,
+            'vector_mask_options': vector_mask_options,
+        },
+        target_path_list=aligned_list,
+        task_name='align input rasters')
+
+    mutual_mask_task = task_graph.add_task(
+        func=pygeoprocessing.raster_map,
+        kwargs={
+            'op': _create_mutual_mask_op,
+            'rasters': aligned_list,
+            'target_path': f_reg['mask_path'],
+            'target_nodata': 0,
+        },
+        target_path_list=[f_reg['mask_path']],
+        dependent_task_list=[align_task],
+        task_name='create mask')
+
+    mask_tasks = {}  # use a dict so we can put these in a loop
+    for key, aligned_path, masked_path in zip(input_raster_key_list,
+                                              aligned_list, masked_list):
+        mask_tasks[f"masked_{key}"] = task_graph.add_task(
+            func=pygeoprocessing.raster_map,
+            kwargs={
+                'op': _mask_single_raster_op,
+                'rasters': [aligned_path, f_reg['mask_path']],
+                'target_path': masked_path,
+            },
+            target_path_list=[masked_path],
+            dependent_task_list=[mutual_mask_task, align_task],
+            task_name=f'mask {key}')
+
+    pit_fill_task = task_graph.add_task(
+        func=pygeoprocessing.routing.fill_pits,
+        args=(
+            (f_reg['masked_dem_path'], 1),
+            f_reg['pit_filled_dem_path']),
+        target_path_list=[f_reg['pit_filled_dem_path']],
+        dependent_task_list=[mask_tasks['masked_dem']],
+        task_name='fill pits')
+
+    slope_task = task_graph.add_task(
+        func=pygeoprocessing.calculate_slope,
+        args=(
+            (f_reg['pit_filled_dem_path'], 1),
+            f_reg['slope_path']),
+        dependent_task_list=[pit_fill_task],
+        target_path_list=[f_reg['slope_path']],
+        task_name='calculate slope')
+
+    threshold_slope_task = task_graph.add_task(
+        func=pygeoprocessing.raster_map,
+        kwargs=dict(
+            op=threshold_slope_op,
+            rasters=[f_reg['slope_path']],
+            target_path=f_reg['thresholded_slope_path']),
+        target_path_list=[f_reg['thresholded_slope_path']],
+        dependent_task_list=[slope_task],
+        task_name='threshold slope')
+
+    flow_dir_task = task_graph.add_task(
+        func=pygeoprocessing.routing.flow_dir_mfd,
+        args=(
+            (f_reg['pit_filled_dem_path'], 1),
+            f_reg['flow_direction_path']),
+        target_path_list=[f_reg['flow_direction_path']],
+        dependent_task_list=[pit_fill_task],
+        task_name='flow direction calculation')
+
+    flow_accumulation_task = task_graph.add_task(
+        func=pygeoprocessing.routing.flow_accumulation_mfd,
+        args=(
+            (f_reg['flow_direction_path'], 1),
+            f_reg['flow_accumulation_path']),
+        target_path_list=[f_reg['flow_accumulation_path']],
+        dependent_task_list=[flow_dir_task],
+        task_name='flow accumulation calculation')
+
+    ls_factor_task = task_graph.add_task(
+        func=_calculate_ls_factor,
+        args=(
+            f_reg['flow_accumulation_path'],
+            f_reg['slope_path'],
+            float(args['l_max']),
+            f_reg['ls_path']),
+        target_path_list=[f_reg['ls_path']],
+        dependent_task_list=[
+            flow_accumulation_task, slope_task],
+        task_name='ls factor calculation')
+
+    stream_task = task_graph.add_task(
+        func=pygeoprocessing.routing.extract_streams_mfd,
+        args=(
+            (f_reg['flow_accumulation_path'], 1),
+            (f_reg['flow_direction_path'], 1),
+            float(args['threshold_flow_accumulation']),
+            f_reg['stream_path']),
+        kwargs={'trace_threshold_proportion': 0.7},
+        target_path_list=[f_reg['stream_path']],
+        dependent_task_list=[flow_accumulation_task],
+        task_name='extract streams')
+
+    if drainage_present:
+        drainage_task = task_graph.add_task(
+            func=pygeoprocessing.raster_map,
+            kwargs=dict(
+                op=add_drainage_op,
+                rasters=[f_reg['stream_path'], f_reg['masked_drainage_path']],
+                target_path=f_reg['stream_and_drainage_path'],
+                target_dtype=numpy.uint8),
+            target_path_list=[f_reg['stream_and_drainage_path']],
+            dependent_task_list=[stream_task, mask_tasks['masked_drainage']],
+            task_name='add drainage')
+        drainage_raster_path_task = (
+            f_reg['stream_and_drainage_path'], drainage_task)
+    else:
+        drainage_raster_path_task = (
+            f_reg['stream_path'], stream_task)
+
+    lulc_to_c = biophysical_df['usle_c'].to_dict()
+    threshold_w_task = task_graph.add_task(
+        func=_calculate_w,
+        args=(
+            lulc_to_c, f_reg['masked_lulc_path'], f_reg['w_path'],
+            f_reg['thresholded_w_path']),
+        target_path_list=[f_reg['w_path'], f_reg['thresholded_w_path']],
+        dependent_task_list=[mask_tasks['masked_lulc']],
+        task_name='calculate W')
+
+    lulc_to_cp = (biophysical_df['usle_c'] * biophysical_df['usle_p']).to_dict()
+    cp_task = task_graph.add_task(
+        func=_calculate_cp,
+        args=(
+            lulc_to_cp, f_reg['masked_lulc_path'],
+            f_reg['cp_factor_path']),
+        target_path_list=[f_reg['cp_factor_path']],
+        dependent_task_list=[mask_tasks['masked_lulc']],
+        task_name='calculate CP')
+
+    rkls_task = task_graph.add_task(
+        func=_calculate_rkls,
+        args=(
+            f_reg['ls_path'],
+            f_reg['masked_erosivity_path'],
+            f_reg['masked_erodibility_path'],
+            drainage_raster_path_task[0],
+            f_reg['rkls_path']),
+        target_path_list=[f_reg['rkls_path']],
+        dependent_task_list=[
+            mask_tasks['masked_erosivity'], mask_tasks['masked_erodibility'],
+            drainage_raster_path_task[1], ls_factor_task],
+        task_name='calculate RKLS')
+
+    usle_task = task_graph.add_task(
+        func=pygeoprocessing.raster_map,
+        kwargs=dict(
+            op=usle_op,
+            rasters=[f_reg['rkls_path'], f_reg['cp_factor_path']],
+            target_path=f_reg['usle_path']),
+        target_path_list=[f_reg['usle_path']],
+        dependent_task_list=[rkls_task, cp_task],
+        task_name='calculate USLE')
+
+    # bar_task_map = {}
+    # for factor_path, factor_task, accumulation_path, out_bar_path, bar_id in [
+    #         (f_reg['thresholded_w_path'], threshold_w_task,
+    #          f_reg['w_accumulation_path'],
+    #          f_reg['w_bar_path'],
+    #          'w_bar'),
+    #         (f_reg['thresholded_slope_path'], threshold_slope_task,
+    #          f_reg['s_accumulation_path'],
+    #          f_reg['s_bar_path'],
+    #          's_bar')]:
+    #     bar_task = task_graph.add_task(
+    #         func=_calculate_bar_factor,
+    #         args=(
+    #             f_reg['flow_direction_path'], factor_path,
+    #             f_reg['flow_accumulation_path'],
+    #             accumulation_path, out_bar_path),
+    #         target_path_list=[accumulation_path, out_bar_path],
+    #         dependent_task_list=[
+    #             factor_task, flow_accumulation_task, flow_dir_task],
+    #         task_name='calculate %s' % bar_id)
+    #     bar_task_map[bar_id] = bar_task
+
+    # d_up_task = task_graph.add_task(
+    #     func=_calculate_d_up,
+    #     args=(
+    #         f_reg['w_bar_path'], f_reg['s_bar_path'],
+    #         f_reg['flow_accumulation_path'], f_reg['d_up_path']),
+    #     target_path_list=[f_reg['d_up_path']],
+    #     dependent_task_list=[
+    #         bar_task_map['s_bar'], bar_task_map['w_bar'],
+    #         flow_accumulation_task],
+    #     task_name='calculate Dup')
+
+    # inverse_ws_factor_task = task_graph.add_task(
+    #     func=pygeoprocessing.raster_map,
+    #     kwargs=dict(
+    #         op=inverse_ws_op,
+    #         rasters=[f_reg['thresholded_w_path'],
+    #                  f_reg['thresholded_slope_path']],
+    #         target_path=f_reg['ws_inverse_path']),
+    #     target_path_list=[f_reg['ws_inverse_path']],
+    #     dependent_task_list=[threshold_slope_task, threshold_w_task],
+    #     task_name='calculate inverse ws factor')
+
+    # d_dn_task = task_graph.add_task(
+    #     func=pygeoprocessing.routing.distance_to_channel_mfd,
+    #     args=(
+    #         (f_reg['flow_direction_path'], 1),
+    #         (drainage_raster_path_task[0], 1),
+    #         f_reg['d_dn_path']),
+    #     kwargs={'weight_raster_path_band': (f_reg['ws_inverse_path'], 1)},
+    #     target_path_list=[f_reg['d_dn_path']],
+    #     dependent_task_list=[
+    #         flow_dir_task, drainage_raster_path_task[1],
+    #         inverse_ws_factor_task],
+    #     task_name='calculating d_dn')
+
+    # ic_task = task_graph.add_task(
+    #     func=_calculate_ic,
+    #     args=(
+    #         f_reg['d_up_path'], f_reg['d_dn_path'], f_reg['ic_path']),
+    #     target_path_list=[f_reg['ic_path']],
+    #     dependent_task_list=[d_up_task, d_dn_task],
+    #     task_name='calculate ic')
+
+    # sdr_task = task_graph.add_task(
+    #     func=_calculate_sdr,
+    #     args=(
+    #         float(args['k_param']), float(args['ic_0_param']),
+    #         float(args['sdr_max']), f_reg['ic_path'],
+    #         drainage_raster_path_task[0], f_reg['sdr_path']),
+    #     target_path_list=[f_reg['sdr_path']],
+    #     dependent_task_list=[ic_task],
+    #     task_name='calculate sdr')
+
+    # sed_export_task = task_graph.add_task(
+    #     func=pygeoprocessing.raster_map,
+    #     kwargs=dict(
+    #         op=numpy.multiply,  # export = USLE * SDR
+    #         rasters=[f_reg['usle_path'], f_reg['sdr_path']],
+    #         target_path=f_reg['sed_export_path']),
+    #     target_path_list=[f_reg['sed_export_path']],
+    #     dependent_task_list=[usle_task, sdr_task],
+    #     task_name='calculate sed export')
+
+    # e_prime_task = task_graph.add_task(
+    #     func=_calculate_e_prime,
+    #     args=(
+    #         f_reg['usle_path'], f_reg['sdr_path'],
+    #         drainage_raster_path_task[0], f_reg['e_prime_path']),
+    #     target_path_list=[f_reg['e_prime_path']],
+    #     dependent_task_list=[usle_task, sdr_task],
+    #     task_name='calculate export prime')
+
+    # sed_deposition_task = task_graph.add_task(
+    #     func=sdr_core.calculate_sediment_deposition,
+    #     args=(
+    #         f_reg['flow_direction_path'], f_reg['e_prime_path'],
+    #         f_reg['f_path'], f_reg['sdr_path'],
+    #         f_reg['sed_deposition_path']),
+    #     dependent_task_list=[e_prime_task, sdr_task, flow_dir_task],
+    #     target_path_list=[f_reg['sed_deposition_path'], f_reg['f_path']],
+    #     task_name='sediment deposition')
+
+    # avoided_erosion_task = task_graph.add_task(
+    #     func=pygeoprocessing.raster_map,
+    #     kwargs=dict(
+    #         op=numpy.subtract,  # avoided erosion = rkls - usle
+    #         rasters=[f_reg['rkls_path'], f_reg['usle_path']],
+    #         target_path=f_reg['avoided_erosion_path']),
+    #     dependent_task_list=[rkls_task, usle_task],
+    #     target_path_list=[f_reg['avoided_erosion_path']],
+    #     task_name='calculate avoided erosion')
+
+    # avoided_export_task = task_graph.add_task(
+    #     func=pygeoprocessing.raster_map,
+    #     kwargs=dict(
+    #         op=_avoided_export_op,
+    #         rasters=[f_reg['avoided_erosion_path'],
+    #                  f_reg['sdr_path'],
+    #                  f_reg['sed_deposition_path']],
+    #         target_path=f_reg['avoided_export_path']),
+    #     dependent_task_list=[avoided_erosion_task, sdr_task,
+    #                          sed_deposition_task],
+    #     target_path_list=[f_reg['avoided_export_path']],
+    #     task_name='calculate total retention')
+
+    # _ = task_graph.add_task(
+    #     func=_calculate_what_drains_to_stream,
+    #     args=(f_reg['flow_direction_path'], f_reg['d_dn_path'],
+    #           f_reg['drainage_mask']),
+    #     target_path_list=[f_reg['drainage_mask']],
+    #     dependent_task_list=[flow_dir_task, d_dn_task],
+    #     task_name='write mask of what drains to stream')
+
+    # _ = task_graph.add_task(
+    #     func=_generate_report,
+    #     args=(
+    #         args['watersheds_path'], f_reg['usle_path'],
+    #         f_reg['sed_export_path'], f_reg['sed_deposition_path'],
+    #         f_reg['avoided_export_path'], f_reg['avoided_erosion_path'],
+    #         f_reg['watershed_results_sdr_path']),
+    #     target_path_list=[f_reg['watershed_results_sdr_path']],
+    #     dependent_task_list=[
+    #         usle_task, sed_export_task, avoided_export_task,
+    #         sed_deposition_task, avoided_erosion_task],
+    #     task_name='generate report')
+
+    task_graph.close()
+    task_graph.join()
+
+
+# raster_map op for building a mask where all pixels in the stack are valid.
+def _create_mutual_mask_op(*arrays): return 1
+
+
+# raster_map op for using a mask raster to mask out another raster.
+def _mask_single_raster_op(source_array, mask_array): return source_array
+
+
+def _avoided_export_op(avoided_erosion, sdr, sed_deposition):
+    """raster_map equation: calculate total retention.
+
+    Args:
+        avoided_erosion (numpy.array): Avoided erosion values.
+        sdr (numpy.array): SDR values.
+        sed_deposition (numpy.array): Sediment deposition values.
+
+    Returns:
+        A ``numpy.array`` of computed total retention matching the shape of
+        the input numpy arrays.
+    """
+    # avoided_erosion represents RLKS - RKLSCP (where RKLSCP is also
+    # known as a modified USLE)
+    return avoided_erosion * sdr + sed_deposition
+
+def add_drainage_op(stream, drainage):
+    """raster_map equation: add drainage mask to stream layer.
+
+    Args:
+        stream (numpy.array): binary array where 1 indicates
+            a stream, and 0 is a valid landscape pixel but not a stream.
+        drainage (numpy.array): binary array where 1 indicates any water
+            reaching that pixel drains to a stream.
+
+    Returns:
+        numpy.array combination of stream and drainage
+    """
+    return numpy.where(drainage == 1, 1, stream)
+
+# raster_map equation: calculate USLE
+def usle_op(rkls, cp_factor): return rkls * cp_factor
+
+# raster_map equation: calculate the inverse ws factor
+def inverse_ws_op(w_factor, s_factor): return 1 / (w_factor * s_factor)
+
+
+def _calculate_what_drains_to_stream(
+        flow_dir_mfd_path, dist_to_channel_mfd_path, target_mask_path):
+    """Create a mask indicating regions that do or do not drain to a stream.
+
+    This is useful because ``pygeoprocessing.distance_to_stream_mfd`` may leave
+    some unexpected regions as nodata if they do not drain to a stream.  This
+    may be confusing behavior, so this mask is intended to locate what drains
+    to a stream and what does not. A pixel doesn't drain to a stream if it has
+    a defined flow direction but undefined distance to stream.
+
+    Args:
+        flow_dir_mfd_path (string): The path to an MFD flow direction raster.
+            This raster must have a nodata value defined.
+        dist_to_channel_mfd_path (string): The path to an MFD
+            distance-to-channel raster.  This raster must have a nodata value
+            defined.
+        target_mask_path (string): The path to where the mask raster should be
+            written.
+
+    Returns:
+        ``None``
+    """
+    flow_dir_mfd_nodata = pygeoprocessing.get_raster_info(
+        flow_dir_mfd_path)['nodata'][0]
+    dist_to_channel_nodata = pygeoprocessing.get_raster_info(
+        dist_to_channel_mfd_path)['nodata'][0]
+
+    def _what_drains_to_stream(flow_dir_mfd, dist_to_channel):
+        """Determine which pixels do and do not drain to a stream.
+
+        Args:
+            flow_dir_mfd (numpy.array): A numpy array of MFD flow direction
+                values.
+            dist_to_channel (numpy.array): A numpy array of calculated
+                distances to the nearest channel.
+
+        Returns:
+            A ``numpy.array`` of dtype ``numpy.uint8`` with pixels where:
+
+                * ``255`` where ``flow_dir_mfd`` is nodata (and thus
+                  ``dist_to_channel`` is also nodata).
+                * ``0`` where ``flow_dir_mfd`` has data and ``dist_to_channel``
+                  does not
+                * ``1`` where ``flow_dir_mfd`` has data, and
+                  ``dist_to_channel`` also has data.
+        """
+        drains_to_stream = numpy.full(
+            flow_dir_mfd.shape, _BYTE_NODATA, dtype=numpy.uint8)
+        valid_flow_dir = ~pygeoprocessing.array_equals_nodata(
+            flow_dir_mfd, flow_dir_mfd_nodata)
+        valid_dist_to_channel = (
+            ~pygeoprocessing.array_equals_nodata(
+                dist_to_channel, dist_to_channel_nodata) &
+            valid_flow_dir)
+
+        # Nodata where both flow_dir and dist_to_channel are nodata
+        # 1 where flow_dir and dist_to_channel have values (drains to stream)
+        # 0 where flow_dir has data and dist_to_channel doesn't (doesn't drain)
+        drains_to_stream[valid_flow_dir & valid_dist_to_channel] = 1
+        drains_to_stream[valid_flow_dir & ~valid_dist_to_channel] = 0
+        return drains_to_stream
+
+    pygeoprocessing.raster_calculator(
+        [(flow_dir_mfd_path, 1), (dist_to_channel_mfd_path, 1)],
+        _what_drains_to_stream, target_mask_path, gdal.GDT_Byte, _BYTE_NODATA)
+
+
+def _calculate_ls_factor(
+        flow_accumulation_path, slope_path, l_max,
+        target_ls_factor_path):
+    """Calculate LS factor.
+
+    Calculates the LS factor using Equation 3 from "Extension and
+    validation of a geographic information system-based method for calculating
+    the Revised Universal Soil Loss Equation length-slope factor for erosion
+    risk assessments in large watersheds".
+
+    The equation for this is::
+
+                 (upstream_area + pixel_area)^(m+1) - upstream_area^(m+1)
+        LS = S * --------------------------------------------------------
+                       (pixel_area^(m+2)) * aspect_dir * 22.13^(m)
+
+    Where
+
+        * ``S`` is the slope factor defined in equation 4 from the same paper,
+          calculated by the following where ``b`` is the slope in radians:
+
+          * ``S = 10.8 * sin(b) + 0.03`` where slope < 9%
+          * ``S = 16.8 * sin(b) - 0.50`` where slope >= 9%
+
+        * ``upstream_area`` is interpreted as the square root of the
+          catchment area, to match SAGA-GIS's method for calculating LS
+          Factor.
+        * ``pixel_area`` is the area of the pixel in square meters.
+        * ``m`` is the slope-length exponent of the RUSLE LS-factor,
+          which, as discussed in Oliveira et al. 2013 is a function of the
+          on-pixel slope theta:
+
+          * ``m = 0.2`` when ``theta <= 1%``
+          * ``m = 0.3`` when ``1% < theta <= 3.5%``
+          * ``m = 0.4`` when ``3.5% < theta <= 5%``
+          * ``m = 0.5`` when ``5% < theta <= 9%``
+          * ``m = (beta / (1+beta)`` when ``theta > 9%``, where
+            ``beta = (sin(theta) / 0.0896) / (3*sin(theta)^0.8 + 0.56)``
+
+        * ``aspect_dir`` is calculated by ``|sin(alpha)| + |cos(alpha)|``
+          for the given pixel.
+
+    Oliveira et al can be found at:
+
+        Oliveira, A.H., Silva, M.A. da, Silva, M.L.N., Curi, N., Neto, G.K.,
+        Freitas, D.A.F. de, 2013. Development of Topographic Factor Modeling
+        for Application in Soil Erosion Models, in: Intechopen (Ed.), Soil
+        Processes and Current Trends in Quality Assessment. p. 28.
+
+    Args:
+        flow_accumulation_path (string): path to raster, pixel values are the
+            contributing upslope area at that cell. Pixel size is square.
+        slope_path (string): path to slope raster as a percent
+        l_max (float): if the calculated value of L exceeds this value
+            it is clamped to this value.
+        target_ls_factor_path (string): path to output ls_prime_factor
+            raster
+
+    Returns:
+        None
+
+    """
+    cell_size = abs(pygeoprocessing.get_raster_info(
+        flow_accumulation_path)['pixel_size'][0])
+    cell_area = cell_size ** 2
+
+    def ls_factor_function(percent_slope, flow_accumulation):
+        """Calculate the LS factor.
+
+        Args:
+            percent_slope (numpy.ndarray): slope in percent
+            flow_accumulation (numpy.ndarray): upslope pixels
+            l_max (float): max L factor, clamp to this value if L exceeds it
+
+        Returns:
+            ls_factor
+
+        """
+        # Although Desmet & Govers (1996) discusses "upstream contributing
+        # area", this is not strictly defined. We decided to use the square
+        # root of the upstream contributing area here as an estimate, which
+        # matches the SAGA LS Factor option "square root of catchment area".
+        # See the InVEST ADR-0001 for more information.
+        # We subtract 1 from the flow accumulation because FA includes itself
+        # in its count of pixels upstream and our LS factor equation wants only
+        # those pixels that are strictly upstream.
+        contributing_area = numpy.sqrt((flow_accumulation - 1) * cell_area)
+        slope_in_radians = numpy.arctan(percent_slope / 100)
+
+        aspect_length = (numpy.fabs(numpy.sin(slope_in_radians)) +
+                         numpy.fabs(numpy.cos(slope_in_radians)))
+
+        # From Equation 4 in "Extension and validation of a geographic
+        # information system ..."
+        slope_factor = numpy.where(
+            percent_slope < 9,
+            10.8 * numpy.sin(slope_in_radians) + 0.03,
+            16.8 * numpy.sin(slope_in_radians) - 0.5)
+
+        beta = (
+            (numpy.sin(slope_in_radians) / 0.0896) /
+            (3 * numpy.sin(slope_in_radians)**0.8 + 0.56))
+
+        # Set m value via lookup table: Table 1 in
+        # InVEST Sediment Model_modifications_10-01-2012_RS.docx
+        # note slope_table in percent
+        slope_table = numpy.array([1., 3.5, 5., 9.])
+        m_table = numpy.array([0.2, 0.3, 0.4, 0.5])
+        # mask where slopes are larger than lookup table
+        big_slope_mask = percent_slope > slope_table[-1]
+        m_indexes = numpy.digitize(
+            percent_slope[~big_slope_mask], slope_table, right=True)
+        m_exp = numpy.empty(big_slope_mask.shape, dtype=numpy.float32)
+        m_exp[big_slope_mask] = (
+            beta[big_slope_mask] / (1 + beta[big_slope_mask]))
+        m_exp[~big_slope_mask] = m_table[m_indexes]
+
+        l_factor = (
+            ((contributing_area + cell_area)**(m_exp+1) -
+             contributing_area ** (m_exp+1)) /
+            ((cell_size ** (m_exp + 2)) * (aspect_length**m_exp) *
+             (22.13**m_exp)))
+
+        # threshold L factor to l_max
+        l_factor[l_factor > l_max] = l_max
+
+        return l_factor * slope_factor
+
+    pygeoprocessing.raster_map(
+        op=ls_factor_function,
+        rasters=[slope_path, flow_accumulation_path],
+        target_path=target_ls_factor_path)
+
+
+def _calculate_rkls(
+        ls_factor_path, erosivity_path, erodibility_path, stream_path,
+        rkls_path):
+    """Calculate potential soil loss (tons / (pixel * year)) using RKLS.
+
+    (revised universal soil loss equation with no C or P).
+
+    Args:
+        ls_factor_path (string): path to LS raster that has square pixels in
+            meter units.
+        erosivity_path (string): path to erosivity raster
+            (MJ * mm / (ha * hr * yr))
+        erodibility_path (string): path to erodibility raster
+            (t * ha * hr / (MJ * ha * mm))
+        stream_path (string): path to drainage raster
+            (1 is drainage, 0 is not)
+        rkls_path (string): path to RKLS raster
+
+    Returns:
+        None
+
+    """
+    erosivity_nodata = pygeoprocessing.get_raster_info(
+        erosivity_path)['nodata'][0]
+    erodibility_nodata = pygeoprocessing.get_raster_info(
+        erodibility_path)['nodata'][0]
+    stream_nodata = pygeoprocessing.get_raster_info(
+        stream_path)['nodata'][0]
+
+    cell_size = abs(
+        pygeoprocessing.get_raster_info(ls_factor_path)['pixel_size'][0])
+    cell_area_ha = cell_size**2 / 10000.0  # hectares per pixel
+
+    def rkls_function(ls_factor, erosivity, erodibility, stream):
+        """Calculate the RKLS equation.
+
+        Args:
+            ls_factor (numpy.ndarray): length/slope factor. unitless.
+            erosivity (numpy.ndarray): related to peak rainfall events. units:
+                MJ * mm / (ha * hr * yr)
+            erodibility (numpy.ndarray): related to the potential for soil to
+                erode. units: t * ha * hr / (MJ * ha * mm)
+            stream (numpy.ndarray): stream mask (1 stream, 0 no stream)
+
+        Returns:
+            numpy.ndarray of RKLS values in tons / (pixel * year))
+        """
+        rkls = numpy.empty(ls_factor.shape, dtype=numpy.float32)
+        nodata_mask = (
+            ~pygeoprocessing.array_equals_nodata(ls_factor, _TARGET_NODATA) &
+            ~pygeoprocessing.array_equals_nodata(stream, stream_nodata))
+        if erosivity_nodata is not None:
+            nodata_mask &= ~pygeoprocessing.array_equals_nodata(
+                erosivity, erosivity_nodata)
+        if erodibility_nodata is not None:
+            nodata_mask &= ~pygeoprocessing.array_equals_nodata(
+                erodibility, erodibility_nodata)
+
+        valid_mask = nodata_mask & (stream == 0)
+        rkls[:] = _TARGET_NODATA
+
+        rkls[valid_mask] = (           # rkls units are tons / (pixel * year)
+            ls_factor[valid_mask] *    # unitless
+            erosivity[valid_mask] *    # MJ * mm / (ha * hr * yr)
+            erodibility[valid_mask] *  # t * ha * hr / (MJ * ha * mm)
+            cell_area_ha)              # ha / pixel
+        return rkls
+
+    # aligning with index 3 that's the stream and the most likely to be
+    # aligned with LULCs
+    pygeoprocessing.raster_calculator(
+        [(path, 1) for path in [
+            ls_factor_path, erosivity_path, erodibility_path, stream_path]],
+        rkls_function, rkls_path, gdal.GDT_Float32, _TARGET_NODATA)
+
+
+def threshold_slope_op(slope):
+    """raster_map equation: convert slope to m/m and clamp at 0.005 and 1.0.
+
+    As desribed in Cavalli et al., 2013.
+    """
+    slope_m = slope / 100
+    slope_m[slope_m < 0.005] = 0.005
+    slope_m[slope_m > 1] = 1
+    return slope_m
+
+
+def _calculate_w(
+        lulc_to_c, lulc_path, w_factor_path,
+        out_thresholded_w_factor_path):
+    """W factor: map C values from LULC and lower threshold to 0.001.
+
+    W is a factor in calculating d_up accumulation for SDR.
+
+    Args:
+        lulc_to_c (dict): mapping of LULC codes to C values
+        lulc_path (string): path to LULC raster
+        w_factor_path (string): path to outputed raw W factor
+        out_thresholded_w_factor_path (string): W factor from `w_factor_path`
+            thresholded to be no less than 0.001.
+
+    Returns:
+        None
+
+    """
+    if pygeoprocessing.get_raster_info(lulc_path)['nodata'][0] is None:
+        # will get a case where the raster might be masked but nothing to
+        # replace so 0 is used by default. Ensure this exists in lookup.
+        if 0 not in lulc_to_c:
+            lulc_to_c = lulc_to_c.copy()
+            lulc_to_c[0] = 0.0
+
+    reclass_error_details = {
+        'raster_name': 'LULC', 'column_name': 'lucode',
+        'table_name': 'Biophysical'}
+
+    utils.reclassify_raster(
+        (lulc_path, 1), lulc_to_c, w_factor_path, gdal.GDT_Float32,
+        _TARGET_NODATA, reclass_error_details)
+
+    pygeoprocessing.raster_map(
+        op=lambda w_val: numpy.where(w_val < 0.001, 0.001, w_val),
+        rasters=[w_factor_path],
+        target_path=out_thresholded_w_factor_path)
+
+
+def _calculate_cp(lulc_to_cp, lulc_path, cp_factor_path):
+    """Map LULC to C*P value.
+
+    Args:
+        lulc_to_cp (dict): mapping of lulc codes to CP values
+        lulc_path (string): path to LULC raster
+        cp_factor_path (string): path to output raster of LULC mapped to C*P
+            values
+
+    Returns:
+        None
+
+    """
+    if pygeoprocessing.get_raster_info(lulc_path)['nodata'][0] is None:
+        # will get a case where the raster might be masked but nothing to
+        # replace so 0 is used by default. Ensure this exists in lookup.
+        if 0 not in lulc_to_cp:
+            lulc_to_cp[0] = 0.0
+
+    reclass_error_details = {
+        'raster_name': 'LULC', 'column_name': 'lucode',
+        'table_name': 'Biophysical'}
+
+    utils.reclassify_raster(
+        (lulc_path, 1), lulc_to_cp, cp_factor_path, gdal.GDT_Float32,
+        _TARGET_NODATA, reclass_error_details)
+
+
+def _calculate_bar_factor(
+        flow_direction_path, factor_path, flow_accumulation_path,
+        accumulation_path, out_bar_path):
+    """Route user defined source across DEM.
+
+    Used for calculating S and W bar in the SDR operation.
+
+    Args:
+        dem_path (string): path to DEM raster
+        factor_path (string): path to arbitrary factor raster
+        flow_accumulation_path (string): path to flow accumulation raster
+        flow_direction_path (string): path to flow direction path (in radians)
+        accumulation_path (string): path to a raster that can be used to
+            save the accumulation of the factor.  Temporary file.
+        out_bar_path (string): path to output raster that is the result of
+            the factor accumulation raster divided by the flow accumulation
+            raster.
+
+    Returns:
+        None.
+
+    """
+    LOGGER.debug("doing flow accumulation mfd on %s", factor_path)
+    # manually setting compression to DEFLATE because we got some LZW
+    # errors when testing with large data.
+    pygeoprocessing.routing.flow_accumulation_mfd(
+        (flow_direction_path, 1), accumulation_path,
+        weight_raster_path_band=(factor_path, 1),
+        raster_driver_creation_tuple=('GTIFF', [
+            'TILED=YES', 'BIGTIFF=YES', 'COMPRESS=DEFLATE',
+            'PREDICTOR=3']))
+
+    pygeoprocessing.raster_map(
+        op=lambda base_accum, flow_accum: base_accum / flow_accum,
+        rasters=[accumulation_path, flow_accumulation_path],
+        target_path=out_bar_path)
+
+
+def _calculate_d_up(
+        w_bar_path, s_bar_path, flow_accumulation_path, out_d_up_path):
+    """Calculate w_bar * s_bar * sqrt(flow accumulation * cell area)."""
+    cell_area = abs(
+        pygeoprocessing.get_raster_info(w_bar_path)['pixel_size'][0])**2
+    pygeoprocessing.raster_map(
+        op=lambda w_bar, s_bar, flow_accum: (
+            w_bar * s_bar * numpy.sqrt(flow_accum * cell_area)),
+        rasters=[w_bar_path, s_bar_path, flow_accumulation_path],
+        target_path=out_d_up_path)
+
+
+def _calculate_d_up_bare(
+        s_bar_path, flow_accumulation_path, out_d_up_bare_path):
+    """Calculate s_bar * sqrt(flow accumulation * cell area)."""
+    cell_area = abs(
+        pygeoprocessing.get_raster_info(s_bar_path)['pixel_size'][0])**2
+    pygeoprocessing.raster_map(
+        op=lambda s_bar, flow_accum: (
+            numpy.sqrt(flow_accum * cell_area) * s_bar),
+        rasters=[s_bar_path, flow_accumulation_path],
+        target_path=out_d_up_bare_path)
+
+
+def _calculate_ic(d_up_path, d_dn_path, out_ic_factor_path):
+    """Calculate log10(d_up/d_dn)."""
+    # ic can be positive or negative, so float.min is a reasonable nodata value
+    d_dn_nodata = pygeoprocessing.get_raster_info(d_dn_path)['nodata'][0]
+
+    def ic_op(d_up, d_dn):
+        """Calculate IC factor."""
+        valid_mask = (
+            ~pygeoprocessing.array_equals_nodata(d_up, _TARGET_NODATA) &
+            ~pygeoprocessing.array_equals_nodata(d_dn, d_dn_nodata) & (d_dn != 0) &
+            (d_up != 0))
+        ic_array = numpy.empty(valid_mask.shape, dtype=numpy.float32)
+        ic_array[:] = _IC_NODATA
+        ic_array[valid_mask] = numpy.log10(
+            d_up[valid_mask] / d_dn[valid_mask])
+        return ic_array
+
+    pygeoprocessing.raster_calculator(
+        [(d_up_path, 1), (d_dn_path, 1)], ic_op, out_ic_factor_path,
+        gdal.GDT_Float32, _IC_NODATA)
+
+
+def _calculate_sdr(
+        k_factor, ic_0, sdr_max, ic_path, stream_path, out_sdr_path):
+    """Derive SDR from k, ic0, ic; 0 on the stream and clamped to sdr_max."""
+    def sdr_op(ic_factor, stream):
+        """Calculate SDR factor."""
+        valid_mask = (
+            ~pygeoprocessing.array_equals_nodata(ic_factor, _IC_NODATA) & (stream != 1))
+        result = numpy.empty(valid_mask.shape, dtype=numpy.float32)
+        result[:] = _TARGET_NODATA
+        result[valid_mask] = (
+            sdr_max / (1+numpy.exp((ic_0-ic_factor[valid_mask])/k_factor)))
+        result[stream == 1] = 0.0
+        return result
+
+    pygeoprocessing.raster_calculator(
+        [(ic_path, 1), (stream_path, 1)], sdr_op, out_sdr_path,
+        gdal.GDT_Float32, _TARGET_NODATA)
+
+
+def _calculate_e_prime(usle_path, sdr_path, stream_path, target_e_prime):
+    """Calculate USLE * (1-SDR)."""
+    def e_prime_op(usle, sdr, streams):
+        """Wash that does not reach stream."""
+        valid_mask = (
+            ~pygeoprocessing.array_equals_nodata(usle, _TARGET_NODATA) &
+            ~pygeoprocessing.array_equals_nodata(sdr, _TARGET_NODATA))
+        result = numpy.empty(valid_mask.shape, dtype=numpy.float32)
+        result[:] = _TARGET_NODATA
+        result[valid_mask] = usle[valid_mask] * (1-sdr[valid_mask])
+        # set to 0 on streams, to prevent nodata propagating up/down slope
+        # in calculate_sediment_deposition. This makes sense intuitively:
+        # E'_i represents the sediment export from pixel i that does not
+        # reach a stream, which is 0 if pixel i is already in a stream.
+        result[streams == 1] = 0
+        return result
+
+    pygeoprocessing.raster_calculator(
+        [(usle_path, 1), (sdr_path, 1), (stream_path, 1)], e_prime_op,
+        target_e_prime, gdal.GDT_Float32, _TARGET_NODATA)
+
+
+def _generate_report(
+        watersheds_path, usle_path, sed_export_path,
+        sed_deposition_path, avoided_export_path, avoided_erosion_path,
+        watershed_results_sdr_path):
+    """Create summary vector with totals for rasters.
+
+    Args:
+        watersheds_path (string): The path to the watersheds vector.
+        usle_path (string): The path to the computed USLE raster.
+        sed_export_path (string): The path to the sediment export raster.
+        sed_deposition_path (string): The path to the sediment deposition
+            raster.
+        avoided_export_path (string): The path to the total retention raster.
+        avoided_erosion_path (string): The path to the avoided local
+            erosion raster.
+        watershed_results_sdr_path (string): The path to where the watersheds
+            vector will be created.  This path must end in ``.shp`` as it will
+            be written as an ESRI Shapefile.
+
+    Returns:
+        ``None``
+    """
+    original_datasource = gdal.OpenEx(watersheds_path, gdal.OF_VECTOR)
+    if os.path.exists(watershed_results_sdr_path):
+        LOGGER.warning(f'overwriting results at {watershed_results_sdr_path}')
+        os.remove(watershed_results_sdr_path)
+    driver = gdal.GetDriverByName('ESRI Shapefile')
+    target_vector = driver.CreateCopy(
+        watershed_results_sdr_path, original_datasource)
+
+    target_layer = target_vector.GetLayer()
+    target_layer.SyncToDisk()
+
+    field_summaries = {
+        'usle_tot': pygeoprocessing.zonal_statistics(
+            (usle_path, 1), watershed_results_sdr_path),
+        'sed_export': pygeoprocessing.zonal_statistics(
+            (sed_export_path, 1), watershed_results_sdr_path),
+        'sed_dep': pygeoprocessing.zonal_statistics(
+            (sed_deposition_path, 1), watershed_results_sdr_path),
+        'avoid_exp': pygeoprocessing.zonal_statistics(
+            (avoided_export_path, 1), watershed_results_sdr_path),
+        'avoid_eros': pygeoprocessing.zonal_statistics(
+            (avoided_erosion_path, 1), watershed_results_sdr_path),
+    }
+
+    for field_name in field_summaries:
+        field_def = ogr.FieldDefn(field_name, ogr.OFTReal)
+        field_def.SetWidth(24)
+        field_def.SetPrecision(11)
+        target_layer.CreateField(field_def)
+
+    target_layer.ResetReading()
+    for feature in target_layer:
+        feature_id = feature.GetFID()
+        for field_name in field_summaries:
+            feature.SetField(
+                field_name,
+                float(field_summaries[field_name][feature_id]['sum']))
+        target_layer.SetFeature(feature)
+    target_vector = None
+    target_layer = None
+
+
+@validation.invest_validator
+def validate(args, limit_to=None):
+    """Validate args to ensure they conform to `execute`'s contract.
+
+    Args:
+        args (dict): dictionary of key(str)/value pairs where keys and
+            values are specified in `execute` docstring.
+        limit_to (str): (optional) if not None indicates that validation
+            should only occur on the args[limit_to] value. The intent that
+            individual key validation could be significantly less expensive
+            than validating the entire `args` dictionary.
+
+    Returns:
+        list of ([invalid key_a, invalid_keyb, ...], 'warning/error message')
+            tuples. Where an entry indicates that the invalid keys caused
+            the error message in the second part of the tuple. This should
+            be an empty list if validation succeeds.
+
+    """
+    return validation.validate(
+        args, MODEL_SPEC['args'], MODEL_SPEC['args_with_spatial_overlap'])

From 329e56795d9b9bf174eb3bdb15bf9c8b3d11e4a6 Mon Sep 17 00:00:00 2001
From: Saleh Mamun <mamun4105@gmail.com>
Date: Fri, 15 Nov 2024 17:01:54 -0600
Subject: [PATCH 4/7] Added data preprocessing and running of both SDR and
 Pollination Model

---
 scripts/es_model_functions.py | 561 ++++++++++++++++++++++++++++++++++
 1 file changed, 561 insertions(+)
 create mode 100644 scripts/es_model_functions.py

diff --git a/scripts/es_model_functions.py b/scripts/es_model_functions.py
new file mode 100644
index 0000000..ecd5145
--- /dev/null
+++ b/scripts/es_model_functions.py
@@ -0,0 +1,561 @@
+import os
+import itertools
+import shutil
+import numpy as np
+import pandas as pd
+import geopandas as gpd
+import pygeoprocessing.geoprocessing as geo
+import rasterio as rio
+from rasterio.warp import calculate_default_transform, reproject, Resampling
+import time
+from osgeo import gdal
+gdal.UseExceptions()
+from scripts.pollination_shocks import calculate_crop_value_and_shock
+import scripts.sdr_local as sdr_local
+from scripts.pollination_biophysical_model import pollination_sufficiency
+import csv
+import hazelbean as hb
+import seals_utils
+import logging
+# Setup logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
+logger = logging.getLogger()
+
+def es_options(p):
+    p.aoi_buffer_distance = 5000        # buffer distance around the aoi in meters
+    p.force_es_data_preparation=False       # If True, it will force the data preparation step. Otherwise, it will skip if the files are already present.
+    p.process_common_inputs_first_time = True
+    p.dem_resolution = 300      # Set the DEM resolution for the SDR model.
+    # Define where additional data is stored
+    p.common_input_folder = os.path.join(p.base_data_dir, 'seals', 'es_common_inputs')
+
+def es_models(p):
+    ''''
+    function to create directories for ecosystem services models.
+    '''
+    pass
+
+def pollination_results(p):
+    pollination_biophysical(p)
+    pollination_economic(p)
+def sdr_results(p):
+    logger.info("Preparing data for SDR model")
+    sdr_data_preparation(p)
+    logger.info("Running SDR model")
+    run_sdr(p)
+    logger.info("Calculating LPL shock")
+    lpl_shock(p)
+
+def slice_inputs(base_raster, source_raster_dict, aoi_vector, target_folder):
+    """
+    Assumes that `source_raster_dict` will be keyed as new_name: orig_path
+    """
+    base_lulc_info = geo.get_raster_info(base_raster)
+    
+    src_paths = []
+    dst_paths = []
+    dst_dict = {}
+    for k, v in source_raster_dict.items():
+        if os.path.splitext(v)[1] in [".tif", ".bil", ".img"]:
+            src_paths.append(v)
+            dst_paths.append(os.path.join(target_folder, f'{k}.tif'))
+            dst_dict[k] = os.path.join(target_folder, f'{k}.tif')
+    
+    try:
+        raster_align_index = int([i for i, x in enumerate(src_paths) if x == source_raster_dict[f'dem_7755']][0])
+    except:
+        raster_align_index = None
+
+    geo.align_and_resize_raster_stack(
+        base_raster_path_list=src_paths,
+        target_raster_path_list=dst_paths,
+        resample_method_list=['near' for _ in src_paths],
+        target_pixel_size=base_lulc_info['pixel_size'],
+        bounding_box_mode="intersection",
+        base_vector_path_list=[aoi_vector],
+        raster_align_index=raster_align_index,
+        vector_mask_options={'mask_vector_path': aoi_vector},
+    )
+    return dst_dict
+
+def reproject_raster(raster_path, target_raster_path, dst_crs='EPSG:7755'):
+    '''
+    This function reprojects the raster to EPSG:7755 or any given crs.
+    This helper function is used in data_preparation function.
+    '''
+    with rio.open(raster_path) as src:
+        transform, width, height = calculate_default_transform(
+            src.crs, dst_crs, src.width, src.height, *src.bounds)
+        kwargs = src.meta.copy()
+        kwargs.update({
+            'crs': dst_crs,
+            'transform': transform,
+            'width': width,
+            'height': height,
+            "compress": "LZW"
+        })
+        if src.crs != dst_crs:
+            with rio.open(target_raster_path, 'w', **kwargs) as dst:
+                for i in range(1, src.count + 1):
+                    reproject(
+                        source=rio.band(src, i),
+                        destination=rio.band(dst, i),
+                        src_transform=src.transform,
+                        src_crs=src.crs,
+                        dst_transform=transform,
+                        dst_crs=dst_crs,
+                        resampling=Resampling.nearest)
+        else:
+            shutil.copy(raster_path, target_raster_path)
+
+def fix_watersheds(watersheds_path, aoi_path, aoi_label, unique_id_field, projected_crs, output_folder=None):
+    '''
+    This function fixes the watersheds shapefile by:
+     1) clipping the watershed file to aoi and then
+     2) exploding multipart polygons and keeping the largest area polygon for each unique id.
+    TODO: The fix might delete relative large polygons if they are not the largest in the multipart polygon. Need to discuss this.
+    Do we implement a filter based on area?
+    '''
+    # watersheds_path = "C:/Users/salmamun/Files/base_data/seals/es_common_inputs/watersheds_level_7_valid.gpkg"
+    # aoi_path = "C:/Users/salmamun/Files/seals/projects/Tanzania/intermediate/project_aoi_gtap_r251/aoi_buffer/aoi_TZA_32736.gpkg"
+    # aoi_path = "C:/Users/salmamun/Files/seals/projects/Tanzania/intermediate/project_aoi_gtap_r251/aoi_TZA.gpkg"
+    # aoi_label = "TZA"
+    # unique_id_field = "SORT"
+    # projected_crs = 32736
+    # output_folder = 'C:\\Users\\salmamun\\Files\\seals\\projects\\Tanzania\\intermediate\\es_models\\sdr_results\\es_clipped_inputs'
+    watersheds_gdf = gpd.read_file(watersheds_path)
+        
+    if output_folder==None:
+        output_folder = os.path.dirname(watersheds_path)
+    clipped_watershed = os.path.join(output_folder, os.path.basename(watersheds_path).replace(".gpkg", f"_{aoi_label}_clipped.gpkg"))
+    if not hb.path_exists(clipped_watershed):
+        aoi_gdf = gpd.read_file(aoi_path)
+        clipped_gdf = watersheds_gdf.clip(aoi_gdf, keep_geom_type=False)
+        # Change clipped_gdf crs to projected_crs
+        if clipped_gdf.crs==None:
+            clipped_gdf.crs = "EPSG:4326"
+        if clipped_gdf.crs != f"EPSG:{projected_crs}":
+            clipped_gdf = clipped_gdf.to_crs(projected_crs)
+        clipped_gdf.to_file(clipped_watershed, driver='GPKG')
+    else:
+        clipped_gdf = gpd.read_file(clipped_watershed)
+
+    multipart_watershed = os.path.join(output_folder, os.path.basename(watersheds_path).replace(".gpkg", f"_{aoi_label}_multipart.gpkg"))
+    if not hb.path_exists(multipart_watershed):
+        multipart_gdf = clipped_gdf.explode(index_parts=True)
+        multipart_gdf = multipart_gdf.loc[multipart_gdf.geometry.geometry.type=='Polygon']       # Only keep Polygons
+        multipart_gdf["area"] = multipart_gdf['geometry'].area
+        multipart_gdf.to_file(multipart_watershed, driver='GPKG')
+    else:
+        multipart_gdf = gpd.read_file(multipart_watershed)
+
+    fixed_watersheds = os.path.join(output_folder, os.path.basename(watersheds_path).replace(".gpkg", f"_{aoi_label}_fixed.gpkg"))
+    if not hb.path_exists(fixed_watersheds):
+        fixed_gdf = multipart_gdf.merge(multipart_gdf.groupby([unique_id_field])['area'].max().reset_index(), on=[unique_id_field, 'area'], how='right')
+        fixed_gdf.to_file(fixed_watersheds, driver='GPKG')
+
+def create_aoi_buffer(p):
+    '''
+    This function creates a buffer around the aoi shapefile.
+    The buffer distance is defined in the parameters file.
+    '''
+    aoi_gdf = gpd.read_file(p.aoi_path)
+    aoi_projected_gdf = aoi_gdf.to_crs(p.projected_crs)
+    aoi_projected_gdf.to_file(p.aoi_projected_path, driver='GPKG')
+    aoi_projected_gdf['geometry'] = aoi_projected_gdf.buffer(p.aoi_buffer_distance)
+    aoi_buffer_gdf = aoi_projected_gdf.to_crs(4326)      # converting back to 4326 for saving the file.
+    aoi_buffer_gdf.to_file(p.aoi_buffer_path, driver='GPKG')
+    return p.aoi_buffer_path
+
+def sdr_data_preparation(p):
+    '''
+    This function prepares the data for the ecosystem services models.
+    You need to run this function onece to prepare the data.'''
+    es_options(p)
+    p.es_clipped_input_folder = os.path.join(p.sdr_results_dir, 'es_clipped_inputs')
+    clipped_uprojected_raster_folder = os.path.join(p.es_clipped_input_folder, 'unprojected')
+    p.clipped_projected_raster_folder = os.path.join(p.es_clipped_input_folder, 'projected')
+    hb.create_directories([p.es_clipped_input_folder, clipped_uprojected_raster_folder, p.clipped_projected_raster_folder])
+    logger.info(f"Clipping and projecting the rasters for the SDR model.")
+    source_raster_dict = {}
+    if p.scenario_definitions_path is not None:
+            p.scenarios_df = pd.read_csv(p.scenario_definitions_path)
+            
+            for index, row in p.scenarios_df.iterrows():
+                seals_utils.assign_df_row_to_object_attributes(p, row)
+                seals_utils.set_derived_attributes(p)
+
+                # Build a dict for the lulc labels
+                labels_dict = dict(zip(p.all_class_indices, p.all_class_labels))
+               
+                # this acountcs for the fact that the way the correspondence is loaded is not necessarily in the numerical order
+                indices_to_labels_dict = dict(sorted(labels_dict.items()))
+
+                for year in p.years:
+                    if p.scenario_type ==  'baseline':
+                        seal_lulc_name = 'lulc_' + p.lulc_src_label + '_' + p.lulc_simplification_label + '_' + str(p.key_base_year) + '.tif'
+                        seal_lulc_path = os.path.join(p.fine_processed_inputs_dir, "lulc", p.lulc_src_label,  p.lulc_simplification_label, seal_lulc_name)
+                    else:
+                        seal_lulc_name = 'lulc_' + p.lulc_src_label + '_' + p.lulc_simplification_label + '_' + p.exogenous_label + '_' + p.climate_label + '_' + p.model_label + '_' + p.counterfactual_label + '_' + str(year) + '.tif'
+                        seal_lulc_path = os.path.join(p.stitched_lulc_simplified_scenarios_dir, seal_lulc_name)
+                    source_raster_dict[f"lulc_{p.scenario_type}_{year}"]= seal_lulc_path
+     
+    
+    # Add common input rasters
+    if p.process_common_inputs_first_time:
+        source_raster_dict[f"dem_con"]= os.path.join(p.common_input_folder, "global_dem_10as.tif")
+        source_raster_dict[f"k_factor"]= os.path.join(p.common_input_folder, "global_soil_erodibility.tif")
+        source_raster_dict[f"rainfall_erosivity"]= os.path.join(p.common_input_folder, "GlobalR_NoPol.tif")
+
+    aoi_buffer_folder = os.path.join(os.path.dirname(p.aoi_path), "aoi_buffer")
+    hb.create_directories([aoi_buffer_folder])
+    p.aoi_projected_path = os.path.join(aoi_buffer_folder, os.path.basename(p.aoi_path.replace(".gpkg", f"_{p.projected_crs}.gpkg")))
+    p.aoi_buffer_path = os.path.join(aoi_buffer_folder, os.path.basename(p.aoi_path.replace(".gpkg", "_buffer.gpkg")))
+    if not hb.path_exists(p.aoi_buffer_path) or not hb.path_exists(p.aoi_projected_path):
+        p.aoi_buffer_path = create_aoi_buffer(p)
+    
+    for k, v in source_raster_dict.items():
+        single_dict = {k: v}
+        base_raster = v
+        unprojected_raster = os.path.join(clipped_uprojected_raster_folder, f'{k}.tif')
+        if not hb.path_exists(unprojected_raster) or p.force_es_data_preparation:
+            if hb.path_exists(base_raster):
+                slice_inputs(base_raster, single_dict, p.aoi_buffer_path, clipped_uprojected_raster_folder)
+        
+        projected_raster = os.path.join(p.clipped_projected_raster_folder, f'{k}_{p.projected_crs}.tif')
+        if not hb.path_exists(projected_raster) or p.force_es_data_preparation:
+            if hb.path_exists(unprojected_raster):
+                reproject_raster(unprojected_raster, projected_raster, dst_crs=f'EPSG:{p.projected_crs}')
+
+    if p.process_common_inputs_first_time:
+        # Get other resolution of dem
+        dem_path = os.path.join(p.clipped_projected_raster_folder, f"dem_con_{p.projected_crs}.tif")
+        p.scaled_dem_path = dem_path.replace('.tif', f"_{p.dem_resolution}.tif")
+        if not hb.path_exists(p.scaled_dem_path) or p.force_es_data_preparation:
+            dem_resolution(dem_path, resolution=p.dem_resolution)
+        
+        # Fix watersheds
+        logger.info(f"Fixing watersheds")
+        p.watersheds_path = os.path.join(p.common_input_folder, "watersheds_level_7_valid.gpkg")
+        p.fixed_watersheds_path = os.path.join(p.es_clipped_input_folder, os.path.basename(p.watersheds_path).replace(".gpkg", f"_{p.aoi_label}_fixed.gpkg"))
+        if not hb.path_exists(p.fixed_watersheds_path):
+            if hb.path_exists(p.watersheds_path):
+                fix_watersheds(watersheds_path=p.watersheds_path, aoi_path=p.aoi_path, aoi_label=p.aoi_label, unique_id_field="SORT", 
+                            projected_crs = p.projected_crs, output_folder=p.es_clipped_input_folder)
+        # Copy biophysical table
+        logger.info(f"Copying biophysical table")
+        sdr_biophysical_table_source = os.path.join(p.common_input_folder, 'sdr_parameter_table_onil.csv')
+        p.sdr_biophyscal_table = os.path.join(p.es_clipped_input_folder, 'sdr_biophysical_table.csv')
+        if not hb.path_exists(p.sdr_biophyscal_table):
+            shutil.copy(sdr_biophysical_table_source, p.sdr_biophyscal_table)
+
+        # reproject area raster to projected crs. Do this once. We will need this for LPL calculations.
+        area_4326 = os.path.join(p.project_aoi_gtap_r251_dir, "pyramids", "aoi_ha_per_cell_fine.tif")
+        p.area_projected_path = os.path.join(p.clipped_projected_raster_folder, f"area_ha_per_cell_fine_{p.projected_crs}.tif")
+        if not hb.path_exists(p.area_projected_path):
+            if hb.path_exists(area_4326):
+                reproject_raster(area_4326, p.area_projected_path, dst_crs = f'EPSG:{p.projected_crs}')
+
+def dem_resolution(dem_path, resolution=300):
+    '''
+    This function resamples the dem to the given resolution.
+    '''
+    with rio.open(dem_path) as dataset:
+        scale_factor_x = dataset.res[0]/resolution
+        scale_factor_y = dataset.res[1]/resolution
+
+        profile = dataset.profile.copy()
+        # resample data to target shape
+        data = dataset.read(
+            out_shape=(
+                dataset.count,
+                int(dataset.height * scale_factor_y),
+                int(dataset.width * scale_factor_x)
+            ),
+            resampling=Resampling.cubic_spline
+        )
+
+        # scale image transform
+        transform = dataset.transform * dataset.transform.scale(
+            (1 / scale_factor_x),
+            (1 / scale_factor_y)
+        )
+        profile.update({"height": data.shape[-2],
+                        "width": data.shape[-1],
+                    "transform": transform})
+
+    target_dem_path = dem_path.replace('.tif', f"_{resolution}.tif")
+    with rio.open(target_dem_path, "w", **profile) as dataset:
+        dataset.write(data)
+
+def prepare_common_sdr_config(p):
+    sdr_args = {}
+    sdr_args["workspace_dir"] = os.path.join(p.sdr_results_dir, f"output_dem_{p.dem_resolution}")
+    # result_sufix will be set based on lulc scenario
+
+    # The following are the raster needed for models
+    # lulc path is also set based on lulc scenario
+    sdr_args["dem_path"] = os.path.join(p.clipped_projected_raster_folder, f"dem_con_{p.projected_crs}_{p.dem_resolution}.tif")
+    sdr_args["precipitation_raster_path"] =  os.path.join(p.clipped_projected_raster_folder, f"precipitation_annual_{p.projected_crs}.tif")
+    sdr_args["erodibility_path"] =  os.path.join(p.clipped_projected_raster_folder, f"k_factor_{p.projected_crs}.tif")
+    sdr_args["erosivity_path"] =  os.path.join(p.clipped_projected_raster_folder, f"rainfall_erosivity_{p.projected_crs}.tif")
+
+    # The following are the vector needed for models
+    sdr_args["watersheds_path"] = p.fixed_watersheds_path    
+
+    # The following are the parameters tables
+    sdr_args["biophysical_table_path"] = p.sdr_biophyscal_table
+
+    # The following are specific parameters for the models
+    sdr_args["threshold_flow_accumulation"] = 75  # Taken from Onil - 75
+    sdr_args["k_param"] = 2
+    sdr_args["sdr_max"] = 0.8       # DEFAULT_SDR_SDR_MAX
+    sdr_args["ic_0_param"] = 0.5        # DEFAULT_SDR_IC_0_PARAM
+    sdr_args["l_max"] = 122         # DEFAULT_SDR_L_MAX
+    sdr_args["drainage_path"] = ''  # This parameter is optional
+
+    # Resource management parameters
+    sdr_args["n_workers"] = p.num_workers     # Provide the number of cores to use.
+    return sdr_args
+
+def run_sdr(p):
+    '''
+    This function runs the SDR model with given args.
+    '''
+    
+    sdr_args = prepare_common_sdr_config(p)
+    p.usle_results_path = {}
+    p.lulc_projected_path = {}
+    if p.scenario_definitions_path is not None:
+            p.scenarios_df = pd.read_csv(p.scenario_definitions_path)
+            
+            for index, row in p.scenarios_df.iterrows():
+                seals_utils.assign_df_row_to_object_attributes(p, row)
+                seals_utils.set_derived_attributes(p)
+
+                # Build a dict for the lulc labels
+                for year in p.years:
+                    sdr_args["results_suffix"] = f"{p.scenario_type}_{year}"
+                    sdr_args["lulc_path"] = os.path.join(p.clipped_projected_raster_folder, f"lulc_{p.scenario_type}_{year}_{p.projected_crs}.tif")
+                    p.lulc_projected_path[f"lulc_{p.scenario_type}_{year}"] = sdr_args["lulc_path"]
+                    # Run the model. Implemented a shortcut to avoid running the model if the results (usle) already exist.
+                    p.usle_results_path[f"usle_{p.scenario_type}_{year}"] = os.path.join(sdr_args["workspace_dir"], f"usle_{p.scenario_type}_{year}.tif")
+                    if not hb.path_exists(p.usle_results_path[f"usle_{p.scenario_type}_{year}"]):
+                        sdr_local.execute(sdr_args)
+
+def get_errosion_by_ha(aligned_rasters, output_folder, binary_threshold=False, **kwargs) -> dict:
+    '''
+    This function calculates the sediment export by hectare.
+    '''
+    if not os.path.exists(output_folder):
+        os.makedirs(output_folder)
+    se_nodata = geo.get_raster_info(aligned_rasters['usle'])['nodata'][0]
+    out_dict = {}
+    if binary_threshold:
+        key = 'usle_binary'
+        source_nodata = 8
+        data_type = gdal.GDT_Byte
+    else:
+        key = 'usle_by_ha'
+        source_nodata = se_nodata
+        data_type = gdal.GDT_Float32
+
+    resolution = kwargs.get('resolution')
+    seal_scenario = kwargs.get('seal_scenario')
+    target_file = os.path.join(output_folder, f'{key}_{resolution}_{seal_scenario}.tif')
+    out_dict[key] = target_file
+    def _usle_by_ha(se, ar):
+        result = se.copy()
+        valid_mask = (se != se_nodata) & (ar != -9999)
+        result[valid_mask] = se[valid_mask]/ar[valid_mask]
+        if binary_threshold:
+            result[valid_mask] = np.where(result[valid_mask] >= binary_threshold, 1, 0)
+            result[~valid_mask] = source_nodata    
+        return result
+        
+    geo.raster_calculator(
+        [(aligned_rasters['usle'], 1), (aligned_rasters['area'], 1)],
+        _usle_by_ha,
+        target_file,
+        data_type,
+        source_nodata
+    )
+    return out_dict
+
+def get_lpl_shock(aligned_rasters, output_folder, resolution, seal_scenario, binary_threshold=11, lpl_factor=0.08) -> None:
+    '''
+    This function calculates the sediment export by hectare.
+    '''
+    if not os.path.exists(output_folder):
+        os.makedirs(output_folder)
+    
+    usle_binary = get_errosion_by_ha(aligned_rasters, output_folder, 
+                                     resolution=resolution, seal_scenario=seal_scenario, 
+                                     binary_threshold=binary_threshold)
+    errosion_arr = rio.open(usle_binary['usle_binary']).read(1).ravel()
+    area_arr = rio.open(aligned_rasters['area']).read(1).ravel()
+    lulc_arr = rio.open(aligned_rasters['lulc']).read(1).ravel()
+
+    area_seb_ag = area_arr[np.all([errosion_arr == 1, lulc_arr==2], axis=0)].sum()       # 2 is cropland
+    area_ag = area_arr[np.all([area_arr != -9.99900000e+03, lulc_arr==2], axis=0)].sum()
+
+    lpl_shock = area_seb_ag*lpl_factor/area_ag
+    return lpl_shock
+
+def align_raster(base_raster: str,
+                 source_raster_dict: dict,
+                 aoi_vector: str,
+                 target_folder: str) -> dict:
+    '''
+    This function aligns the raster to the base raster.
+    '''
+    if not os.path.exists(target_folder):
+        os.makedirs(target_folder)
+    
+    base_lulc_info = geo.get_raster_info(base_raster)
+    
+    src_paths = []
+    dst_paths = []
+    dst_dict = {}
+    for k, v in source_raster_dict.items():
+        if os.path.splitext(v)[1] in [".tif", ".bil", ".img"]:
+            src_paths.append(v)
+            dst_paths.append(os.path.join(target_folder, f'{k}.tif'))
+            dst_dict[k] = os.path.join(target_folder, f'{k}.tif')
+    
+
+    geo.align_and_resize_raster_stack(
+        base_raster_path_list=src_paths,
+        target_raster_path_list=dst_paths,
+        resample_method_list=['near' for _ in src_paths],
+        target_pixel_size=base_lulc_info['pixel_size'],
+        bounding_box_mode="intersection",
+        base_vector_path_list=[aoi_vector],
+        raster_align_index=0,
+        vector_mask_options={'mask_vector_path': aoi_vector}
+    )
+    return dst_dict
+
+def lpl_shock(p):
+    p.lpl_shock_all = pd.DataFrame(columns=["scenario", "scenario_type","resolution", "year", "avoided erosion"])
+    p.lpl_output_folder = os.path.join(p.sdr_results_dir, "shocks")
+    hb.create_directories([p.lpl_output_folder])
+    p.lpl_shock_csv_path = os.path.join(p.sdr_results_dir, "lpl_shock.csv")
+    if p.scenario_definitions_path is not None:
+            p.scenarios_df = pd.read_csv(p.scenario_definitions_path)
+            
+            for index, row in p.scenarios_df.iterrows():
+                seals_utils.assign_df_row_to_object_attributes(p, row)
+                seals_utils.set_derived_attributes(p)
+                for year in p.years:
+                    align_raster_dict = {
+                        "usle": p.usle_results_path[f"usle_{p.scenario_type}_{year}"],
+                        "area": p.area_projected_path,
+                        "lulc": p.lulc_projected_path[f"lulc_{p.scenario_type}_{year}"]
+                    }
+                    aligned_raster_folder = os.path.join(p.sdr_results_dir, "aligned_rasters", f"aligned_rasters_{p.scenario_type}_{year}_{p.dem_resolution}")
+                    hb.create_directories([aligned_raster_folder])
+                    
+                    aligned_rasters = {k: os.path.join(aligned_raster_folder, f"{k}.tif") for k, v in align_raster_dict.items()}
+                    if not hb.path_all_exist(list(aligned_rasters.values())):
+                        aligned_rasters = align_raster(align_raster_dict["usle"],
+                                                    align_raster_dict, p.aoi_projected_path, aligned_raster_folder)
+            
+                    # Calculate LPL shocks
+                    get_errosion_by_ha(aligned_rasters, p.lpl_output_folder, resolution=p.dem_resolution, 
+                                    seal_scenario=p.scenario_type, binary_threshold=False)        
+                    # The above is not neccessary for lpl_shocks. Just created those to see usle by ha.
+                    lpl_shock = get_lpl_shock(aligned_rasters, p.lpl_output_folder, resolution=p.dem_resolution, 
+                                            seal_scenario=p.scenario_type, binary_threshold=11, lpl_factor=0.08)
+                    p.lpl_shock_all.loc[len(p.lpl_shock_all)] = [p.scenario_label, p.scenario_type, p.dem_resolution, year, lpl_shock]
+    
+    p.lpl_shock_all.to_csv(p.lpl_shock_csv_path, index=False)
+
+def pollination_biophysical(p):
+    print("start")
+    if p.run_this:  
+        p.pollination_sufficiency_baseline = {}
+        p.pollination_sufficiency_scenario = {}             
+        for index, row in p.scenarios_df.iterrows():
+            seals_utils.assign_df_row_to_object_attributes(p, row)
+            hb.log('Calculating pollination sufficiency on ' + str(index) + ' of ' + str(len(p.scenarios_df)))
+            if p.scenario_type == 'baseline':
+                
+                for year in p.base_years:
+                    current_lulc_path = os.path.join(p.fine_processed_inputs_dir, 'lulc/esa/seals7','lulc_esa_seals7_' + str(p.base_years[0]) + '.tif')
+                    current_lulc_bb = hb.get_bounding_box(current_lulc_path)
+                    pollination_sufficiency_output_path = os.path.join(p.cur_dir, 'pollination_sufficiency_' + p.scenario_type + '_' + str(p.base_years[0]) + '.tif')
+                    if not hb.path_exists(pollination_sufficiency_output_path):
+                        hb.log('Running global_invest_main.make_poll_suff on LULC: ' + str(current_lulc_path) + ' and saving results to ' + str(pollination_sufficiency_output_path))
+                        # pollination_sufficiency(current_lulc_path, pollination_sufficiency_output_path)
+                        pollination_sufficiency(current_lulc_path, pollination_sufficiency_output_path)
+                    p.pollination_sufficiency_baseline[f"{p.scenario_type}_{year}"] = pollination_sufficiency_output_path        # Saleh: Saving the path to the object for later use
+            elif p.scenario_type != 'baseline':
+    
+                for year in p.years:
+                    current_lulc_path = os.path.join(p.stitched_lulc_simplified_scenarios_dir, 
+                                                     'lulc_' + p.lulc_src_label + '_' + p.lulc_simplification_label + '_' + p.exogenous_label +
+                                                       '_' + p.climate_label + '_' + p.model_label + '_' + p.counterfactual_label + '_' + str(year) + '.tif')
+                    current_lulc_bb = hb.get_bounding_box(current_lulc_path)
+                    pollination_sufficiency_output_path = os.path.join(p.cur_dir, 'pollination_sufficiency_' + p.scenario_label + '_' + str(p.years[0]) + '.tif')
+                    if not hb.path_exists(pollination_sufficiency_output_path):
+                        hb.log('Running global_invest_main.make_poll_suff on LULC: ' + str(current_lulc_path) + ' and saving results to ' + str(pollination_sufficiency_output_path))
+                        #pollination_sufficiency(current_lulc_path, pollination_sufficiency_output_path)
+                        pollination_sufficiency(current_lulc_path, pollination_sufficiency_output_path)
+                    p.pollination_sufficiency_scenario[f"{p.scenario_type}_{year}"] = pollination_sufficiency_output_path        # Saleh: Saving the path to the object for later use
+
+def pollination_economic(p):
+    p.crop_data_dir = os.path.join(p.common_input_folder, "pollination/crop_production")
+    p.pollination_dependence_spreadsheet_input_path = os.path.join(p.common_input_folder, "pollination/rspb20141799supp3.xls")  # Note had to fix pol.dep for coffee and green broadbean as it was 25 not .25
+    output_dir = os.path.join(p.cur_dir)
+    p.pollination_shock_csv_path = os.path.join(p.cur_dir, 'pollination_shock.csv')
+
+    # Initialize CSV file for shock values
+    if not os.path.exists(p.pollination_shock_csv_path):
+        with open(p.pollination_shock_csv_path, 'w', newline='') as shock_file:
+            writer = csv.writer(shock_file)
+            writer.writerow(['scenario', 'pollination supply', 'year', 'Baseline Path', 'Scenario Path'])
+
+    if p.run_this:
+        for index, row in p.scenarios_df.iterrows():
+            seals_utils.assign_df_row_to_object_attributes(p, row)
+            hb.log('Calculating pollination_economic on ' + str(index) + ' of ' + str(len(p.scenarios_df)))
+            if p.scenario_type == 'baseline':
+                for year in p.base_years:
+                    lulc_baseline_path = os.path.join(p.fine_processed_inputs_dir, 'lulc/esa/seals7','lulc_esa_seals7_' + str(year) + '.tif')
+                    pollination_sufficiency_baseline = p.pollination_sufficiency_baseline[f"{p.scenario_type}_{year}"] 
+                    # os.path.join(os.path.dirname(p.cur_dir), 'pollination_biophysical','pollination_sufficiency_' + p.scenario_type + '_' + str(year) + '.tif') 
+                    p.base_year = year                   
+            elif p.scenario_type != 'baseline':
+                for year in p.years:
+                    lulc_scenario_path = os.path.join(p.stitched_lulc_simplified_scenarios_dir, 
+                                                     'lulc_' + p.lulc_src_label + '_' + p.lulc_simplification_label + '_' + p.exogenous_label +
+                                                       '_' + p.climate_label + '_' + p.model_label + '_' + p.counterfactual_label + '_' + str(year) + '.tif')
+                    pollination_sufficiency_scenario = p.pollination_sufficiency_scenario[f"{p.scenario_type}_{year}"] 
+                    # os.path.join(os.path.dirname(p.cur_dir), 'pollination_biophysical', 'pollination_sufficiency_' + p.scenario_label + '_' + str(year) + '.tif')                   
+                    calculate_crop_value_and_shock(
+                        lulc_baseline_path, lulc_scenario_path, pollination_sufficiency_baseline, pollination_sufficiency_scenario,
+                        p.crop_data_dir, p.pollination_dependence_spreadsheet_input_path, output_dir, p.pollination_shock_csv_path, p.scenario_label,
+                        year, p.base_year
+                    )
+
+def manage_input(p):
+    p.shock_output_dir = os.path.join(p.project_dir, "output")
+    hb.create_directories([p.shock_output_dir])
+    # pollination_shock_df = pd.read_csv("C:/Users/salmamun/Files/seals/projects/India/intermediate/es_models/pollination_results/pollination_shock.csv")
+    pollination_shock_df = pd.read_csv(p.pollination_shock_csv_path)
+    # lpl_shock_df = pd.read_csv("C:/Users/salmamun/Files/seals/projects/India/intermediate/es_models/sdr_results/lpl_shock.csv")
+    lpl_shock_df = pd.read_csv(p.lpl_shock_csv_path)
+    manage_input_df = pd.DataFrame(columns=['reg', 'year', 'pollination supply', 'avoided erosion'])
+    for index, row in p.scenarios_df.iterrows():
+        seals_utils.assign_df_row_to_object_attributes(p, row)
+        p.lpl_baseline = lpl_shock_df.loc[(lpl_shock_df['scenario_type'] == 'baseline'), 'avoided erosion'].values[0]
+        if p.scenario_type != 'baseline':
+            for year in p.years:
+                pollination_supply = pollination_shock_df.loc[(pollination_shock_df['scenario'] == p.scenario_label) & (pollination_shock_df['year'] == year), 'pollination supply'].values[0]
+                lpl_shock_scenario = lpl_shock_df.loc[(lpl_shock_df['scenario'] == p.scenario_label) & (lpl_shock_df['year'] == year), 'avoided erosion'].values[0]
+                avoided_erosion = 1- (lpl_shock_scenario - p.lpl_baseline)
+                manage_input_df.loc[len(manage_input_df)] = [p.aoi_label, year, pollination_supply, avoided_erosion]
+
+    manage_input_df.to_csv(os.path.join(p.shock_output_dir, "shock_output.csv"), index=False)
+    shutil.rmtree(p.manage_input_dir, ignore_errors=True)

From f119ffd94adb08a08850036e1ac2247524d7b89c Mon Sep 17 00:00:00 2001
From: Saleh Mamun <mamun4105@gmail.com>
Date: Fri, 15 Nov 2024 17:02:32 -0600
Subject: [PATCH 5/7] Added a version of project_aoi function that can take
 gtapv7_r251_label as column name when filtering AOI

This version of the function make disputed lands to project AOI. I also added a local function that is similar to hb.extract_features_in_shapefile_by_attribute but it can dissolve features if the length of features is greater than 1. Later we may consider adding this fucntion directly yo hb.
---
 seals/seals_tasks.py | 83 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 83 insertions(+)

diff --git a/seals/seals_tasks.py b/seals/seals_tasks.py
index 751191b..12db526 100644
--- a/seals/seals_tasks.py
+++ b/seals/seals_tasks.py
@@ -69,3 +69,86 @@ def project_aoi(p):
                     
     else:
         raise NameError('Unable to interpret p.aoi.')
+
+def project_aoi_gtap_r251(p):
+
+    p.ha_per_cell_coarse_path = p.get_path(hb.ha_per_cell_ref_paths[p.coarse_resolution_arcseconds])
+    p.ha_per_cell_fine_path = p.get_path(hb.ha_per_cell_ref_paths[p.fine_resolution_arcseconds])
+    
+    # Process p.aoi to set the regional_vector, bb, bb_exact, and aoi_ha_per_cell_paths
+    if isinstance(p.aoi, str):
+        if p.aoi == 'global':
+            p.aoi_path = p.global_regions_vector_path
+            p.aoi_label = 'global'
+            p.bb_exact = hb.global_bounding_box
+            p.bb = p.bb_exact
+
+            p.aoi_ha_per_cell_coarse_path = p.ha_per_cell_coarse_path
+            p.aoi_ha_per_cell_fine_path = p.ha_per_cell_fine_path
+        
+        elif isinstance(p.aoi, str):
+            if len(p.aoi) == 3: # Then it might be an ISO3 code. For now, assume so.
+                p.aoi_path = os.path.join(p.cur_dir, 'aoi_' + str(p.aoi) + '.gpkg')
+                p.aoi_label = p.aoi
+            else: # Then it's a path to a shapefile.
+                p.aoi_path = p.aoi
+                p.aoi_label = os.path.splitext(os.path.basename(p.aoi))[0]
+
+            for current_aoi_path in hb.list_filtered_paths_nonrecursively(p.cur_dir, include_strings='aoi'):
+                if current_aoi_path != p.aoi_path:
+                    raise NameError('There is more than one AOI in the current directory. This means you are trying to run a project in a new area of interst in a project that was already run in a different area of interest. This is not allowed! You probably want to create a new project directory and set the p = hb.ProjectFlow(...) line to point to the new directory.')
+
+            if not hb.path_exists(p.aoi_path):
+                extract_features_in_shapefile_by_attribute_dissolve(p.global_regions_vector_path, p.aoi_path, 'gtapv7_r251_label', p.aoi.upper())
+                # over here we are using a local function. It can be substitute by a function in the hb module with added if clause.
+                # hb.extract_features_in_shapefile_by_attribute(p.global_regions_vector_path, p.aoi_path, 'gtapv7_r251_label', p.aoi.upper())
+            
+            p.bb_exact = hb.spatial_projection.get_bounding_box(p.aoi_path)
+            p.bb = hb.pyramids.get_pyramid_compatible_bb_from_vector_and_resolution(p.aoi_path, p.processing_resolution_arcseconds)
+            
+            
+            # Create a PROJECT-SPECIFIC version of these clipped ones.
+            p.aoi_ha_per_cell_fine_path = os.path.join(p.cur_dir, 'pyramids', 'aoi_ha_per_cell_fine.tif')
+            if not hb.path_exists(p.aoi_ha_per_cell_fine_path):
+                hb.create_directories(p.aoi_ha_per_cell_fine_path)
+                
+                #  make ha_per_cell_paths not be a dict but a project level ha_per_cell_fine_path etc
+                hb.clip_raster_by_bb(p.ha_per_cell_fine_path, p.bb, p.aoi_ha_per_cell_fine_path)
+            
+            p.aoi_ha_per_cell_coarse_path = os.path.join(p.cur_dir, 'pyramids', 'aoi_ha_per_cell_coarse.tif')
+            if not hb.path_exists(p.aoi_ha_per_cell_coarse_path):
+                hb.create_directories(p.aoi_ha_per_cell_coarse_path)
+                hb.clip_raster_by_bb(p.ha_per_cell_coarse_path, p.bb, p.aoi_ha_per_cell_coarse_path)
+        
+            
+            
+        else:
+            p.bb_exact = hb.spatial_projection.get_bounding_box(p.aoi_path)
+            p.bb = hb.pyramids.get_pyramid_compatible_bb_from_vector_and_resolution(p.aoi_path, p.processing_resolution_arcseconds)
+
+            # Create a PROJECT-SPECIFIC version of these clipped ones.
+            p.aoi_ha_per_cell_fine_path = os.path.join(p.cur_dir, 'pyramids', 'aoi_ha_per_cell_fine.tif')
+            if not hb.path_exists(p.aoi_ha_per_cell_fine_path):
+                hb.create_directories(p.aoi_ha_per_cell_fine_path)
+                hb.clip_raster_by_bb(p.ha_per_cell_paths[p.fine_resolution_arcseconds], p.bb, p.aoi_ha_per_cell_fine_path)
+            
+            p.aoi_ha_per_cell_coarse_path = os.path.join(p.cur_dir, 'pyramids', 'aoi_ha_per_cell_coarse.tif')
+            if not hb.path_exists(p.aoi_ha_per_cell_coarse_path):
+                hb.create_directories(p.aoi_ha_per_cell_coarse_path)
+                hb.clip_raster_by_bb(p.ha_per_cell_paths[p.coarse_resolution_arcseconds], p.bb, p.aoi_ha_per_cell_coarse_path)
+                    
+    else:
+        raise NameError('Unable to interpret p.aoi.')
+    
+def extract_features_in_shapefile_by_attribute_dissolve(input_path, output_path, column_name, column_filter):
+    import geopandas as gpd
+    gdf = gpd.read_file(input_path)
+    # print(gdf)
+    gdf_out = gdf.loc[gdf[column_name] == column_filter]
+    
+    if len(gdf_out) == 0:
+        raise NameError('No features found in ' + str(input_path) + ' with ' + str(column_name) + ' == ' + str(column_filter))
+    if len(gdf_out) > 1:
+        gdf_out = gdf_out.dissolve(by=column_name)
+    hb.create_directories(output_path)
+    gdf_out.to_file(output_path)
\ No newline at end of file

From fd6114a6777b8ce798b281f8dae19a2bc00decc7 Mon Sep 17 00:00:00 2001
From: Saleh Mamun <mamun4105@gmail.com>
Date: Fri, 15 Nov 2024 17:03:10 -0600
Subject: [PATCH 6/7] wrote a separate function for building task tree where
 ecosystem service and economic implications are added in the pipeline

---
 seals/seals_initialize_project.py | 44 ++++++++++++++++++++++++++++++-
 1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/seals/seals_initialize_project.py b/seals/seals_initialize_project.py
index 5250bb2..150da94 100644
--- a/seals/seals_initialize_project.py
+++ b/seals/seals_initialize_project.py
@@ -252,4 +252,46 @@ def build_custom_coarse_algorithm_task_tree(p):
 
     ##### VIZUALIZE EXISTING DATA #####
     p.visualization_task = p.add_task(seals_visualization_tasks.visualization)
-    p.lulc_pngs_task = p.add_task(seals_visualization_tasks.lulc_pngs, parent=p.visualization_task)
\ No newline at end of file
+    p.lulc_pngs_task = p.add_task(seals_visualization_tasks.lulc_pngs, parent=p.visualization_task)
+
+def build_manage_run_task_tree(p):
+
+    # Define the project AOI
+    p.project_aoi_task = p.add_task(seals_tasks.project_aoi_gtap_r251)
+
+    ##### FINE PROCESSED INPUTS #####    
+    p.fine_processed_inputs_task = p.add_task(seals_generate_base_data.fine_processed_inputs)
+    p.generated_kernels_task = p.add_task(seals_generate_base_data.generated_kernels, parent=p.fine_processed_inputs_task, creates_dir=False)
+    p.lulc_clip_task = p.add_task(seals_generate_base_data.lulc_clip, parent=p.fine_processed_inputs_task, creates_dir=False)
+    p.lulc_simplifications_task = p.add_task(seals_generate_base_data.lulc_simplifications, parent=p.fine_processed_inputs_task, creates_dir=False)
+    p.lulc_binaries_task = p.add_task(seals_generate_base_data.lulc_binaries, parent=p.fine_processed_inputs_task, creates_dir=False)
+    p.lulc_convolutions_task = p.add_task(seals_generate_base_data.lulc_convolutions, parent=p.fine_processed_inputs_task, creates_dir=False)
+
+    ##### COARSE CHANGE #####
+    p.coarse_change_task = p.add_task(seals_process_coarse_timeseries.coarse_change, skip_existing=0)
+    p.extraction_task = p.add_task(seals_process_coarse_timeseries.coarse_extraction, parent=p.coarse_change_task, run=1, skip_existing=0)
+    p.coarse_simplified_task = p.add_task(seals_process_coarse_timeseries.coarse_simplified_proportion, parent=p.coarse_change_task, skip_existing=0)
+    p.coarse_simplified_ha_task = p.add_task(seals_process_coarse_timeseries.coarse_simplified_ha, parent=p.coarse_change_task, skip_existing=0)
+    p.coarse_simplified_ha_difference_from_previous_year_task = p.add_task(seals_process_coarse_timeseries.coarse_simplified_ha_difference_from_previous_year, parent=p.coarse_change_task, skip_existing=0)
+
+    ##### REGIONAL 
+    p.regional_change_task = p.add_task(seals_process_coarse_timeseries.regional_change)     
+
+    ##### ALLOCATION #####
+    p.allocations_task = p.add_iterator(seals_main.allocations)
+    p.allocation_zones_task = p.add_iterator(seals_main.allocation_zones, run_in_parallel=p.run_in_parallel, parent=p.allocations_task)
+    p.allocation_task = p.add_task(seals_main.allocation, parent=p.allocation_zones_task, skip_existing=1)
+
+    ##### STITCH ZONES #####
+    p.stitched_lulc_simplified_scenarios_task = p.add_task(seals_main.stitched_lulc_simplified_scenarios)
+
+    ##### VIZUALIZE EXISTING DATA #####
+    p.visualization_task = p.add_task(seals_visualization_tasks.visualization)
+    p.lulc_pngs_task = p.add_task(seals_visualization_tasks.lulc_pngs, parent=p.visualization_task)    
+
+    ##### RUN ES MODELS AND CALCULATE SHOCKS #####
+    import scripts.es_model_functions as es_model_functions
+    p.es_model_task = p.add_task(es_model_functions.es_models)
+    p.sdr_model_task = p.add_task(es_model_functions.sdr_results, parent=p.es_model_task)
+    p.pollination_model_task = p.add_task(es_model_functions.pollination_results, parent=p.es_model_task)
+    p.manage_input_task = p.add_task(es_model_functions.manage_input, parent=p.es_model_task)
\ No newline at end of file

From e6fa7efe4e4bac03ea0ed295d79d0f3e70dc21b1 Mon Sep 17 00:00:00 2001
From: Saleh Mamun <mamun4105@gmail.com>
Date: Fri, 15 Nov 2024 17:03:39 -0600
Subject: [PATCH 7/7] added a separate test run for MANAGE

This test run is separated from standard seals test run. It take projected CRS as input. Projected CRS is critical for running invest models
---
 seals/run_manage_standard.py | 75 ++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)
 create mode 100644 seals/run_manage_standard.py

diff --git a/seals/run_manage_standard.py b/seals/run_manage_standard.py
new file mode 100644
index 0000000..960c5b5
--- /dev/null
+++ b/seals/run_manage_standard.py
@@ -0,0 +1,75 @@
+import os, sys
+import seals_utils
+import seals_initialize_project
+import hazelbean as hb
+import pandas as pd
+import time
+main = ''
+if __name__ == '__main__':
+    start_time = time.time()
+    ### ------- ENVIRONMENT SETTINGS -------------------------------
+    # Users should only need to edit lines in this section
+    
+    # Create a ProjectFlow Object to organize directories and enable parallel processing.
+    p = hb.ProjectFlow()
+
+    # Assign project-level attributes to the p object (such as in p.base_data_dir = ... below)
+    # including where the project_dir and base_data are located.
+    # The project_name is used to name the project directory below. If the directory exists, each task will not recreate
+    # files that already exist. 
+    p.user_dir = os.path.expanduser('~')        
+    p.extra_dirs = ['Files', 'seals', 'projects']
+    p.project_name = 'Bangladesh'
+    p.projected_crs = 32645        # crs for the project. It should be changed based on location of the country. It is required for InVEST models. We ar using 7755 for IND, 32736 for TZA, 32645 for BGD
+    # p.project_name = p.project_name + '_' + hb.pretty_time() # If don't you want to recreate everything each time, comment out this line.
+    
+    # Based on the paths above, set the project_dir. All files will be created in this directory.
+    p.project_dir = os.path.join(p.user_dir, os.sep.join(p.extra_dirs), p.project_name)
+    p.set_project_dir(p.project_dir) 
+    
+    p.run_in_parallel = 1 # Must be set before building the task tree if the task tree has parralel iterator tasks.
+
+    # Build the task tree via a building function and assign it to p. IF YOU WANT TO LOOK AT THE MODEL LOGIC, INSPECT THIS FUNCTION
+    # seals_initialize_project.build_standard_run_task_tree(p)
+    seals_initialize_project.build_manage_run_task_tree(p)
+
+    # Set the base data dir. The model will check here to see if it has everything it needs to run.
+    # If anything is missing, it will download it. You can use the same base_data dir across multiple projects.
+    # Additionally, if you're clever, you can move files generated in your tasks to the right base_data_dir
+    # directory so that they are available for future projects and avoids redundant processing.
+    # The final directory has to be named base_data to match the naming convention on the google cloud bucket.
+    p.base_data_dir = os.path.join(p.user_dir, 'Files/base_data')
+
+    # ProjectFlow downloads all files automatically via the p.get_path() function. If you want it to download from a different 
+    # bucket than default, provide the name and credentials here. Otherwise uses default public data 'gtap_invest_seals_2023_04_21'.
+    p.data_credentials_path = None
+    p.input_bucket_name = None
+    
+    ## Set defaults and generate the scenario_definitions.csv if it doesn't exist.
+    # SEALS will run based on the scenarios defined in a scenario_definitions.csv
+    # If you have not run SEALS before, SEALS will generate it in your project's input_dir.
+    # A useful way to get started is to to run SEALS on the test data without modification
+    # and then edit the scenario_definitions.csv to your project needs.   
+    p.scenario_definitions_filename = 'test_standard_scenarios.csv' 
+    p.scenario_definitions_path = os.path.join(p.input_dir, p.scenario_definitions_filename)
+    seals_initialize_project.initialize_scenario_definitions(p)
+        
+    # SEALS is based on an extremely comprehensive region classification system defined in the following geopackage.
+    global_regions_vector_ref_path = os.path.join('cartographic', 'ee', 'ee_r264_correspondence.gpkg')
+    p.global_regions_vector_path = p.get_path(global_regions_vector_ref_path)
+
+    # Set processing resolution: determines how large of a chunk should be processed at a time. 4 deg is about max for 64gb memory systems
+    p.processing_resolution = 1.0 # In degrees. Must be in pyramid_compatible_resolutions
+
+    seals_initialize_project.set_advanced_options(p)
+    
+    p.L = hb.get_logger('test_run_seals')
+    hb.log('Created ProjectFlow object at ' + p.project_dir + '\n    from script ' + p.calling_script + '\n    with base_data set at ' + p.base_data_dir)
+    
+    p.execute()
+
+    result = 'Done!'
+    
+    print(f"It took {time.time() - start_time} seconds to run the script for {p.aoi_label}.")
+
+